diff --git a/projects/rdc/example/diagnostic_example.cc b/projects/rdc/example/diagnostic_example.cc index e1d1c0c730..9acd7bc65d 100644 --- a/projects/rdc/example/diagnostic_example.cc +++ b/projects/rdc/example/diagnostic_example.cc @@ -137,7 +137,7 @@ int main(int, char**) { std::cout << " ============== Run individual diagnostic test ===========\n"; rdc_diag_test_result_t test_result; result = - rdc_test_case_run(rdc_handle, group_id, RDC_DIAG_COMPUTE_PROCESS, nullptr, 0, &test_result); + rdc_test_case_run(rdc_handle, group_id, RDC_DIAG_COMPUTE_PROCESS, nullptr, 0, &test_result, nullptr); if (result != RDC_ST_OK) { std::cout << "Error run RDC_DIAG_COMPUTE_PROCESS diagnostic. Return: " diff --git a/projects/rdc/include/rdc/rdc.h b/projects/rdc/include/rdc/rdc.h index ab85e94e63..e61fa64433 100644 --- a/projects/rdc/include/rdc/rdc.h +++ b/projects/rdc/include/rdc/rdc.h @@ -556,6 +556,12 @@ typedef struct { rdc_diag_test_result_t diag_info[MAX_TEST_CASES]; } rdc_diag_response_t; +typedef void (*rdc_callback_t)(void*, void*); +typedef struct { + rdc_callback_t callback; //!< Callback sends logs for running diagnostics + void* cookie; //!< Cookie is used to identify different callbacks and supply them with data +} rdc_diag_callback_t; + /** * @brief The policy type to support */ @@ -1087,11 +1093,13 @@ rdc_status_t rdc_field_unwatch(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_ * * @param[inout] response The detail results of the tests run. * + * @param[inout] callback Callback for realtime communication + * * @retval ::RDC_ST_OK is returned upon successful call. */ rdc_status_t rdc_diagnostic_run(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response); + rdc_diag_response_t* response, rdc_diag_callback_t* callback); /** * @brief Run one diagnostic test case @@ -1110,11 +1118,14 @@ rdc_status_t rdc_diagnostic_run(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group * * @param[inout] result The results of the test. * + * @param[inout] callback Callback for realtime communication + * * @retval ::RDC_ST_OK is returned upon successful call. */ rdc_status_t rdc_test_case_run(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, rdc_diag_test_cases_t test_case, const char* config, - size_t config_size, rdc_diag_test_result_t* result); + size_t config_size, rdc_diag_test_result_t* result, + rdc_diag_callback_t* callback); /** * @brief Get a description of a provided RDC error status diff --git a/projects/rdc/include/rdc_lib/RdcDiagnostic.h b/projects/rdc/include/rdc_lib/RdcDiagnostic.h index 1ea94a5ae2..feb7ad2dd2 100644 --- a/projects/rdc/include/rdc_lib/RdcDiagnostic.h +++ b/projects/rdc/include/rdc_lib/RdcDiagnostic.h @@ -39,12 +39,12 @@ class RdcDiagnostic { virtual rdc_status_t rdc_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, size_t config_size, - rdc_diag_test_result_t* result) = 0; + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) = 0; // Run multiple test cases virtual rdc_status_t rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) = 0; + rdc_diag_response_t* response, rdc_diag_callback_t* callback) = 0; virtual rdc_status_t rdc_diag_init(uint64_t flags) = 0; virtual rdc_status_t rdc_diag_destroy() = 0; diff --git a/projects/rdc/include/rdc_lib/RdcDiagnosticLibInterface.h b/projects/rdc/include/rdc_lib/RdcDiagnosticLibInterface.h index 09ac8c8ac9..4501c318ce 100644 --- a/projects/rdc/include/rdc_lib/RdcDiagnosticLibInterface.h +++ b/projects/rdc/include/rdc_lib/RdcDiagnosticLibInterface.h @@ -38,7 +38,7 @@ rdc_status_t rdc_diag_test_cases_query(rdc_diag_test_cases_t test_cases[MAX_TEST rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, size_t config_size, - rdc_diag_test_result_t* result); + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback); rdc_status_t rdc_diag_init(uint64_t flags); diff --git a/projects/rdc/include/rdc_lib/RdcHandler.h b/projects/rdc/include/rdc_lib/RdcHandler.h index 164eae25eb..a19b71a63c 100644 --- a/projects/rdc/include/rdc_lib/RdcHandler.h +++ b/projects/rdc/include/rdc_lib/RdcHandler.h @@ -80,11 +80,11 @@ class RdcHandler { // Diagnostic API virtual rdc_status_t rdc_diagnostic_run(rdc_gpu_group_t group_id, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) = 0; + rdc_diag_response_t* response, rdc_diag_callback_t* callback) = 0; virtual rdc_status_t rdc_test_case_run(rdc_gpu_group_t group_id, rdc_diag_test_cases_t test_case, const char* config, size_t config_size, - rdc_diag_test_result_t* result) = 0; + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) = 0; // Control API virtual rdc_status_t rdc_field_update_all(uint32_t wait_for_update) = 0; diff --git a/projects/rdc/include/rdc_lib/impl/RdcDiagnosticModule.h b/projects/rdc/include/rdc_lib/impl/RdcDiagnosticModule.h index 4517844aa3..a58383fdf9 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcDiagnosticModule.h +++ b/projects/rdc/include/rdc_lib/impl/RdcDiagnosticModule.h @@ -27,6 +27,7 @@ THE SOFTWARE. #include #include +#include "rdc/rdc.h" #include "rdc_lib/RdcDiagnostic.h" #include "rdc_lib/RdcTelemetryLibInterface.h" @@ -42,11 +43,11 @@ class RdcDiagnosticModule : public RdcDiagnostic { rdc_status_t rdc_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, size_t config_size, - rdc_diag_test_result_t* result) override; + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) override; rdc_status_t rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) override; + rdc_diag_response_t* response, rdc_diag_callback_t* callback) override; rdc_status_t rdc_diag_init(uint64_t flags) override; rdc_status_t rdc_diag_destroy() override; diff --git a/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h b/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h index b4549f016c..c931c18021 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h +++ b/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h @@ -84,10 +84,10 @@ class RdcEmbeddedHandler final : public RdcHandler { // Diagnostic API rdc_status_t rdc_diagnostic_run(rdc_gpu_group_t group_id, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) override; + rdc_diag_response_t* response, rdc_diag_callback_t* callback) override; rdc_status_t rdc_test_case_run(rdc_gpu_group_t group_id, rdc_diag_test_cases_t test_case, const char* config, size_t config_size, - rdc_diag_test_result_t* result) override; + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) override; // Control API rdc_status_t rdc_field_update_all(uint32_t wait_for_update) override; diff --git a/projects/rdc/include/rdc_lib/impl/RdcRVSLib.h b/projects/rdc/include/rdc_lib/impl/RdcRVSLib.h index 07e8bcd7c3..45d88cd47b 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcRVSLib.h +++ b/projects/rdc/include/rdc_lib/impl/RdcRVSLib.h @@ -40,11 +40,11 @@ class RdcRVSLib : public RdcDiagnostic { rdc_status_t rdc_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, size_t config_size, - rdc_diag_test_result_t* result) override; + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) override; rdc_status_t rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) override; + rdc_diag_response_t* response, rdc_diag_callback_t* callback) override; rdc_status_t rdc_diag_init(uint64_t flags) override; rdc_status_t rdc_diag_destroy() override; @@ -55,7 +55,8 @@ class RdcRVSLib : public RdcDiagnostic { private: RdcLibraryLoader lib_loader_; rdc_status_t (*test_case_run_)(rdc_diag_test_cases_t, uint32_t[RDC_MAX_NUM_DEVICES], uint32_t, - const char* config, size_t config_size, rdc_diag_test_result_t*); + const char* config, size_t config_size, rdc_diag_test_result_t*, + rdc_diag_callback_t*); rdc_status_t (*diag_test_cases_query_)(rdc_diag_test_cases_t[MAX_TEST_CASES], uint32_t*); rdc_status_t (*diag_init_)(uint64_t); rdc_status_t (*diag_destroy_)(); diff --git a/projects/rdc/include/rdc_lib/impl/RdcRocrLib.h b/projects/rdc/include/rdc_lib/impl/RdcRocrLib.h index b87d2528ad..0df9dfcfc7 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcRocrLib.h +++ b/projects/rdc/include/rdc_lib/impl/RdcRocrLib.h @@ -40,11 +40,11 @@ class RdcRocrLib : public RdcDiagnostic { rdc_status_t rdc_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, size_t config_size, - rdc_diag_test_result_t* result) override; + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) override; rdc_status_t rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) override; + rdc_diag_response_t* response, rdc_diag_callback_t* callback) override; rdc_status_t rdc_diag_init(uint64_t flags) override; rdc_status_t rdc_diag_destroy() override; @@ -56,7 +56,7 @@ class RdcRocrLib : public RdcDiagnostic { private: RdcLibraryLoader lib_loader_; rdc_status_t (*test_case_run_)(rdc_diag_test_cases_t, uint32_t[RDC_MAX_NUM_DEVICES], uint32_t, - const char*, size_t, rdc_diag_test_result_t*); + const char*, size_t, rdc_diag_test_result_t*, rdc_diag_callback_t*); rdc_status_t (*diag_test_cases_query_)(rdc_diag_test_cases_t[MAX_TEST_CASES], uint32_t*); rdc_status_t (*diag_init_)(uint64_t); rdc_status_t (*diag_destroy_)(); diff --git a/projects/rdc/include/rdc_lib/impl/RdcSmiLib.h b/projects/rdc/include/rdc_lib/impl/RdcSmiLib.h index e439c95cac..fcddf54f37 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcSmiLib.h +++ b/projects/rdc/include/rdc_lib/impl/RdcSmiLib.h @@ -53,11 +53,11 @@ class RdcSmiLib : public RdcTelemetry, public RdcDiagnostic { rdc_status_t rdc_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, size_t config_size, - rdc_diag_test_result_t* result) override; + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) override; rdc_status_t rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) override; + rdc_diag_response_t* response, rdc_diag_callback_t* callback) override; rdc_status_t rdc_diag_init(uint64_t flags) override; rdc_status_t rdc_diag_destroy() override; diff --git a/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h b/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h index d6792432be..8c273bc8d0 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h +++ b/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h @@ -28,6 +28,7 @@ THE SOFTWARE. #include #include "rdc.grpc.pb.h" // NOLINT +#include "rdc/rdc.h" #include "rdc_lib/RdcHandler.h" namespace amd { @@ -80,10 +81,10 @@ class RdcStandaloneHandler : public RdcHandler { // Diagnostic API rdc_status_t rdc_diagnostic_run(rdc_gpu_group_t group_id, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) override; + rdc_diag_response_t* response, rdc_diag_callback_t* callback) override; rdc_status_t rdc_test_case_run(rdc_gpu_group_t group_id, rdc_diag_test_cases_t test_case, const char* config, size_t config_size, - rdc_diag_test_result_t* result) override; + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) override; // Control RdcAPI rdc_status_t rdc_field_update_all(uint32_t wait_for_update) override; diff --git a/projects/rdc/protos/rdc.proto b/projects/rdc/protos/rdc.proto index d857e3f1ad..b2749448d7 100755 --- a/projects/rdc/protos/rdc.proto +++ b/projects/rdc/protos/rdc.proto @@ -141,16 +141,18 @@ service RdcAPI { // rdc_diag_level_t level, // const char* config, // size_t config_size, - // rdc_diag_response_t* response); - rpc DiagnosticRun(DiagnosticRunRequest) returns (DiagnosticRunResponse) {} + // rdc_diag_response_t* response, + // rdc_diag_callback_t* callback); + rpc DiagnosticRun(DiagnosticRunRequest) returns (stream DiagnosticRunResponse) {} // rdc_status_t rdc_test_case_run( // rdc_gpu_group_t group_id, // rdc_diag_test_cases_t test_case, // const char* config, // size_t config_size, - // rdc_diag_test_result_t* result); - rpc DiagnosticTestCaseRun(DiagnosticTestCaseRunRequest) returns (DiagnosticTestCaseRunResponse) {} + // rdc_diag_test_result_t* result, + // rdc_diag_callback_t* callback); + rpc DiagnosticTestCaseRun(DiagnosticTestCaseRunRequest) returns (stream DiagnosticTestCaseRunResponse) {} //Just an RPC method not used as an API rpc GetMixedComponentVersion(GetMixedComponentVersionRequest) returns (GetMixedComponentVersionResponse) {} @@ -488,6 +490,7 @@ message DiagnosticResponse { message DiagnosticRunResponse { uint32 status = 1; DiagnosticResponse response = 2; + optional string log = 3; } message DiagnosticTestCaseRunRequest { @@ -510,6 +513,7 @@ message DiagnosticTestCaseRunRequest { message DiagnosticTestCaseRunResponse { uint32 status = 1; DiagnosticTestResult result = 2; + optional string log = 3; } message GetMixedComponentVersionRequest { diff --git a/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc b/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc index b8a9ae2c6b..8cbcc01ef2 100644 --- a/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc +++ b/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc @@ -305,24 +305,24 @@ rdc_status_t rdc_group_field_destroy(rdc_handle_t p_rdc_handle, rdc_status_t rdc_diagnostic_run(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) { + rdc_diag_response_t* response, rdc_diag_callback_t* callback) { if (!p_rdc_handle) { return RDC_ST_INVALID_HANDLER; } return static_cast(p_rdc_handle) - ->rdc_diagnostic_run(group_id, level, config, config_size, response); + ->rdc_diagnostic_run(group_id, level, config, config_size, response, callback); } rdc_status_t rdc_test_case_run(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, rdc_diag_test_cases_t test_case, const char* config, - size_t config_size, rdc_diag_test_result_t* result) { + size_t config_size, rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) { if (!p_rdc_handle) { return RDC_ST_INVALID_HANDLER; } return static_cast(p_rdc_handle) - ->rdc_test_case_run(group_id, test_case, config, config_size, result); + ->rdc_test_case_run(group_id, test_case, config, config_size, result, callback); } rdc_status_t get_mixed_component_version(rdc_handle_t p_rdc_handle, mixed_component_t component, mixed_component_version_t* p_mixed_compv) { @@ -454,4 +454,4 @@ rdc_status_t rdc_policy_unregister(rdc_handle_t p_rdc_handle, rdc_gpu_group_t gr } return static_cast(p_rdc_handle) ->rdc_policy_unregister(group_id); -} \ No newline at end of file +} diff --git a/projects/rdc/rdc_libs/rdc/src/RdcDiagnosticModule.cc b/projects/rdc/rdc_libs/rdc/src/RdcDiagnosticModule.cc index 5516897692..5b3fa7d03b 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcDiagnosticModule.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcDiagnosticModule.cc @@ -25,6 +25,7 @@ THE SOFTWARE. #include #include "rdc/rdc.h" +#include "rdc_lib/RdcLogger.h" #include "rdc_lib/rdc_common.h" namespace amd { @@ -48,11 +49,9 @@ rdc_status_t RdcDiagnosticModule::rdc_diag_test_cases_query( return RDC_ST_OK; } -rdc_status_t RdcDiagnosticModule::rdc_test_case_run(rdc_diag_test_cases_t test_case, - uint32_t gpu_index[RDC_MAX_NUM_DEVICES], - uint32_t gpu_count, const char* config, - size_t config_size, - rdc_diag_test_result_t* result) { +rdc_status_t RdcDiagnosticModule::rdc_test_case_run( + rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, + const char* config, size_t config_size, rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) { if (result == nullptr) { return RDC_ST_BAD_PARAMETER; } @@ -64,14 +63,17 @@ rdc_status_t RdcDiagnosticModule::rdc_test_case_run(rdc_diag_test_cases_t test_c strncpy_with_null(result->info, "Not implemented", MAX_DIAG_MSG_LENGTH); return RDC_ST_NOT_SUPPORTED; } - return ite->second->rdc_test_case_run(test_case, gpu_index, gpu_count, config, config_size, - result); + + rdc_status_t status = ite->second->rdc_test_case_run(test_case, gpu_index, gpu_count, config, + config_size, result, callback); + return status; } rdc_status_t RdcDiagnosticModule::rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) { + rdc_diag_response_t* response, + rdc_diag_callback_t* callback) { if (response == nullptr) { return RDC_ST_BAD_PARAMETER; } @@ -89,14 +91,29 @@ rdc_status_t RdcDiagnosticModule::rdc_diagnostic_run(const rdc_group_info_t& gpu rdc_runs.push_back(RDC_DIAG_RVS_TEST); } + if (callback != nullptr && callback->callback != nullptr && callback->cookie != nullptr) { + std::string log = "DiagnosticRun started"; + callback->callback(callback->cookie, log.data()); + } + response->results_count = 0; for (unsigned int i = 0; i < rdc_runs.size(); i++) { + if (callback != nullptr && callback->callback != nullptr && callback->cookie != nullptr) { + std::string log = "Test " + std::to_string(i) + " / " + std::to_string(rdc_runs.size()); + callback->callback(callback->cookie, log.data()); + } response->diag_info[i].test_case = rdc_runs[i]; + // NOTE: rdc_test_case_run reuses the diagnostic_run callback rdc_test_case_run(rdc_runs[i], const_cast(gpus.entity_ids), gpus.count, config, - config_size, &(response->diag_info[i])); + config_size, &(response->diag_info[i]), callback); response->results_count++; } + if (callback != nullptr && callback->callback != nullptr && callback->cookie != nullptr) { + std::string log = "DiagnosticRun finished"; + callback->callback(callback->cookie, log.data()); + } + return RDC_ST_OK; } diff --git a/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc b/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc index 8cc3725d52..4f82cb1740 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc @@ -25,6 +25,7 @@ THE SOFTWARE. #include "amd_smi/amdsmi.h" #include "common/rdc_fields_supported.h" +#include "rdc/rdc.h" #include "rdc_lib/RdcException.h" #include "rdc_lib/RdcLogger.h" #include "rdc_lib/RdcNotification.h" @@ -380,7 +381,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_status_t RdcEmbeddedHandler::rdc_diagnostic_run(rdc_gpu_group_t group_id, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) { + rdc_diag_response_t* response, rdc_diag_callback_t* callback) { if (!response) { return RDC_ST_BAD_PARAMETER; } @@ -391,13 +392,13 @@ rdc_status_t RdcEmbeddedHandler::rdc_diagnostic_run(rdc_gpu_group_t group_id, if (status != RDC_ST_OK) return status; auto diag = rdc_module_mgr_->get_diagnostic_module(); - return diag->rdc_diagnostic_run(rdc_group_info, level, config, config_size, response); + return diag->rdc_diagnostic_run(rdc_group_info, level, config, config_size, response, callback); } rdc_status_t RdcEmbeddedHandler::rdc_test_case_run(rdc_gpu_group_t group_id, rdc_diag_test_cases_t test_case, const char* config, size_t config_size, - rdc_diag_test_result_t* result) { + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) { if (!result) { return RDC_ST_BAD_PARAMETER; } @@ -408,7 +409,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_test_case_run(rdc_gpu_group_t group_id, auto diag = rdc_module_mgr_->get_diagnostic_module(); return diag->rdc_test_case_run(test_case, rdc_group_info.entity_ids, rdc_group_info.count, config, - config_size, result); + config_size, result, callback); } // Control API diff --git a/projects/rdc/rdc_libs/rdc/src/RdcRVSLib.cc b/projects/rdc/rdc_libs/rdc/src/RdcRVSLib.cc index cb82149501..7791f849a9 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcRVSLib.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcRVSLib.cc @@ -21,6 +21,7 @@ THE SOFTWARE. */ #include "rdc_lib/impl/RdcRVSLib.h" +#include "rdc/rdc.h" #include "rdc_lib/RdcLogger.h" namespace amd { @@ -90,7 +91,7 @@ rdc_status_t RdcRVSLib::rdc_diag_test_cases_query(rdc_diag_test_cases_t test_cas rdc_status_t RdcRVSLib::rdc_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, - size_t config_size, rdc_diag_test_result_t* result) { + size_t config_size, rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) { if (result == nullptr) { return RDC_ST_BAD_PARAMETER; } @@ -99,7 +100,7 @@ rdc_status_t RdcRVSLib::rdc_test_case_run(rdc_diag_test_cases_t test_case, } rdc_status_t status = - test_case_run_(test_case, gpu_index, gpu_count, config, config_size, result); + test_case_run_(test_case, gpu_index, gpu_count, config, config_size, result, callback); RDC_LOG(RDC_DEBUG, "Run " << test_case << " test case from RVS: " << rdc_status_string(status) << " config[" << config_size << "]: " << config); return status; @@ -107,12 +108,13 @@ rdc_status_t RdcRVSLib::rdc_test_case_run(rdc_diag_test_cases_t test_case, rdc_status_t RdcRVSLib::rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) { + rdc_diag_response_t* response, rdc_diag_callback_t* callback) { (void)gpus; (void)level; (void)config; (void)config_size; (void)response; + (void)callback; return RDC_ST_NOT_SUPPORTED; } diff --git a/projects/rdc/rdc_libs/rdc/src/RdcRocrLib.cc b/projects/rdc/rdc_libs/rdc/src/RdcRocrLib.cc index 2a6a65c690..a24a756b94 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcRocrLib.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcRocrLib.cc @@ -23,6 +23,7 @@ THE SOFTWARE. #include +#include "rdc/rdc.h" #include "rdc_lib/RdcException.h" #include "rdc_lib/RdcLogger.h" #include "rdc_lib/rdc_common.h" @@ -94,7 +95,7 @@ rdc_status_t RdcRocrLib::rdc_diag_test_cases_query(rdc_diag_test_cases_t test_ca rdc_status_t RdcRocrLib::rdc_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, - size_t config_size, rdc_diag_test_result_t* result) { + size_t config_size, rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) { if (result == nullptr) { return RDC_ST_BAD_PARAMETER; } @@ -103,19 +104,20 @@ rdc_status_t RdcRocrLib::rdc_test_case_run(rdc_diag_test_cases_t test_case, } rdc_status_t status = - test_case_run_(test_case, gpu_index, gpu_count, config, config_size, result); + test_case_run_(test_case, gpu_index, gpu_count, config, config_size, result, callback); RDC_LOG(RDC_DEBUG, "Run " << test_case << " test case from Rocr: " << rdc_status_string(status)); return status; } rdc_status_t RdcRocrLib::rdc_diagnostic_run(const rdc_group_info_t& gpus, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) { + rdc_diag_response_t* response, rdc_diag_callback_t* callback) { (void)gpus; (void)level; (void)config; (void)config_size; (void)response; + (void)callback; return RDC_ST_NOT_SUPPORTED; } diff --git a/projects/rdc/rdc_libs/rdc/src/RdcSmiLib.cc b/projects/rdc/rdc_libs/rdc/src/RdcSmiLib.cc index 9ddb79787c..0fbfad6920 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcSmiLib.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcSmiLib.cc @@ -24,6 +24,7 @@ THE SOFTWARE. #include #include +#include "rdc/rdc.h" #include "rdc_lib/RdcLogger.h" namespace amd { @@ -203,7 +204,8 @@ rdc_status_t RdcSmiLib::rdc_diag_test_cases_query(rdc_diag_test_cases_t test_cas rdc_status_t RdcSmiLib::rdc_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* /*config*/, - size_t /*config_size*/, rdc_diag_test_result_t* result) { + size_t /*config_size*/, rdc_diag_test_result_t* result, + rdc_diag_callback_t* callback) { if (result == nullptr) { return RDC_ST_BAD_PARAMETER; } @@ -220,7 +222,7 @@ rdc_status_t RdcSmiLib::rdc_test_case_run(rdc_diag_test_cases_t test_case, } rdc_status_t RdcSmiLib::rdc_diagnostic_run(const rdc_group_info_t&, rdc_diag_level_t, const char*, - size_t, rdc_diag_response_t*) { + size_t, rdc_diag_response_t*, rdc_diag_callback_t*) { return RDC_ST_NOT_SUPPORTED; } diff --git a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc index 3f523d5bf6..2ed54eaa97 100644 --- a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc +++ b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc @@ -26,6 +26,9 @@ THE SOFTWARE. #include #include "rdc.grpc.pb.h" // NOLINT +#include "rdc.pb.h" +#include "rdc/rdc.h" +#include "rdc_lib/RdcLogger.h" amd::rdc::RdcHandler* make_handler(const char* ip_and_port, const char* root_ca, const char* client_cert, const char* client_key) { @@ -553,7 +556,8 @@ rdc_status_t RdcStandaloneHandler::rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_status_t RdcStandaloneHandler::rdc_diagnostic_run(rdc_gpu_group_t group_id, rdc_diag_level_t level, const char* config, size_t config_size, - rdc_diag_response_t* response) { + rdc_diag_response_t* response, + rdc_diag_callback_t* /*callback*/) { if (!response) { return RDC_ST_BAD_PARAMETER; } @@ -566,40 +570,62 @@ rdc_status_t RdcStandaloneHandler::rdc_diagnostic_run(rdc_gpu_group_t group_id, request.set_config(config); request.set_config_size(config_size); - ::grpc::Status status = stub_->DiagnosticRun(&context, request, &reply); - rdc_status_t err_status = error_handle(status, reply.status()); - if (err_status != RDC_ST_OK) return err_status; - auto res = reply.response(); - response->results_count = res.results_count(); + auto reader = stub_->DiagnosticRun(&context, request); + // for the duration of the DiagnosticRun (multiple tests) - we're stuck in this loop + // + // there are 2 optional reply fields: + // * log - reports messages back during the diagnostic run + // * response - delivered when the diagnostic run completes + while (reader->Read(&reply)) { + if (reply.has_log()) { + // TODO: Remove cout? + RDC_LOG(RDC_INFO, "LOG: " << reply.log()); + std::cout << "LOG: " << reply.log() << std::endl; + continue; + } + if (reply.has_response()) { + RDC_LOG(RDC_DEBUG, "HAS RESPONSE!"); + auto res = reply.response(); + response->results_count = res.results_count(); - if (res.diag_info_size() > static_cast(MAX_TEST_CASES)) { - return RDC_ST_BAD_PARAMETER; + if (res.diag_info_size() > static_cast(MAX_TEST_CASES)) { + return RDC_ST_BAD_PARAMETER; + } + for (int i = 0; i < res.diag_info_size(); i++) { + const ::rdc::DiagnosticTestResult& result = res.diag_info(i); + rdc_diag_test_result_t& to_result = response->diag_info[i]; + to_result.status = static_cast(result.status()); + + // Set details + to_result.details.code = result.details().code(); + strncpy_with_null(to_result.details.msg, result.details().msg().c_str(), + MAX_DIAG_MSG_LENGTH); + + to_result.test_case = static_cast(result.test_case()); + to_result.per_gpu_result_count = result.per_gpu_result_count(); + + // Set Result details + if (result.gpu_results_size() > RDC_MAX_NUM_DEVICES) { + return RDC_ST_BAD_PARAMETER; + } + for (int j = 0; j < result.gpu_results_size(); j++) { + auto per_gpu_result = result.gpu_results(j); + rdc_diag_per_gpu_result_t& to_per_gpu = to_result.gpu_results[j]; + to_per_gpu.gpu_index = per_gpu_result.gpu_index(); + to_per_gpu.gpu_result.code = per_gpu_result.gpu_result().code(); + strncpy_with_null(to_per_gpu.gpu_result.msg, per_gpu_result.gpu_result().msg().c_str(), + MAX_DIAG_MSG_LENGTH); + } + strncpy_with_null(to_result.info, result.info().c_str(), MAX_DIAG_MSG_LENGTH); + } + } } - for (int i = 0; i < res.diag_info_size(); i++) { - const ::rdc::DiagnosticTestResult& result = res.diag_info(i); - rdc_diag_test_result_t& to_result = response->diag_info[i]; - to_result.status = static_cast(result.status()); - // Set details - to_result.details.code = result.details().code(); - strncpy_with_null(to_result.details.msg, result.details().msg().c_str(), MAX_DIAG_MSG_LENGTH); - - to_result.test_case = static_cast(result.test_case()); - to_result.per_gpu_result_count = result.per_gpu_result_count(); - - // Set Result details - if (result.gpu_results_size() > RDC_MAX_NUM_DEVICES) { - return RDC_ST_BAD_PARAMETER; - } - for (int j = 0; j < result.gpu_results_size(); j++) { - auto per_gpu_result = result.gpu_results(j); - rdc_diag_per_gpu_result_t& to_per_gpu = to_result.gpu_results[j]; - to_per_gpu.gpu_index = per_gpu_result.gpu_index(); - to_per_gpu.gpu_result.code = per_gpu_result.gpu_result().code(); - strncpy_with_null(to_per_gpu.gpu_result.msg, per_gpu_result.gpu_result().msg().c_str(), - MAX_DIAG_MSG_LENGTH); - } - strncpy_with_null(to_result.info, result.info().c_str(), MAX_DIAG_MSG_LENGTH); + auto status = reader->Finish(); + if (status.ok()) { + RDC_LOG(RDC_DEBUG, "reader status: success!"); + } else { + RDC_LOG(RDC_ERROR, "reader status: failure!"); } return RDC_ST_OK; @@ -608,7 +634,8 @@ rdc_status_t RdcStandaloneHandler::rdc_diagnostic_run(rdc_gpu_group_t group_id, rdc_status_t RdcStandaloneHandler::rdc_test_case_run(rdc_gpu_group_t group_id, rdc_diag_test_cases_t test_case, const char* config, size_t config_size, - rdc_diag_test_result_t* to_result) { + rdc_diag_test_result_t* to_result, + rdc_diag_callback_t* /*callback*/) { if (!to_result) { return RDC_ST_BAD_PARAMETER; } @@ -621,33 +648,45 @@ rdc_status_t RdcStandaloneHandler::rdc_test_case_run(rdc_gpu_group_t group_id, request.set_config_size(config_size); request.set_test_case(static_cast<::rdc::DiagnosticTestCaseRunRequest_TestCaseType>(test_case)); - ::grpc::Status status = stub_->DiagnosticTestCaseRun(&context, request, &reply); - rdc_status_t err_status = error_handle(status, reply.status()); - if (err_status != RDC_ST_OK) return err_status; - auto result = reply.result(); + auto reader = stub_->DiagnosticTestCaseRun(&context, request); + while (reader->Read(&reply)) { + if (!reply.has_result()) { + RDC_LOG(RDC_ERROR, "NO TEST_RUN RESULT!"); + continue; + } - to_result->status = static_cast(result.status()); + auto result = reply.result(); - // Set details - to_result->details.code = result.details().code(); - strncpy_with_null(to_result->details.msg, result.details().msg().c_str(), MAX_DIAG_MSG_LENGTH); + to_result->status = static_cast(result.status()); - to_result->test_case = static_cast(result.test_case()); - to_result->per_gpu_result_count = result.per_gpu_result_count(); + // Set details + to_result->details.code = result.details().code(); + strncpy_with_null(to_result->details.msg, result.details().msg().c_str(), MAX_DIAG_MSG_LENGTH); - // Set Result details - if (result.gpu_results_size() > RDC_MAX_NUM_DEVICES) { - return RDC_ST_BAD_PARAMETER; + to_result->test_case = static_cast(result.test_case()); + to_result->per_gpu_result_count = result.per_gpu_result_count(); + + // Set Result details + if (result.gpu_results_size() > RDC_MAX_NUM_DEVICES) { + return RDC_ST_BAD_PARAMETER; + } + for (int j = 0; j < result.gpu_results_size(); j++) { + auto per_gpu_result = result.gpu_results(j); + rdc_diag_per_gpu_result_t& to_per_gpu = to_result->gpu_results[j]; + to_per_gpu.gpu_index = per_gpu_result.gpu_index(); + to_per_gpu.gpu_result.code = per_gpu_result.gpu_result().code(); + strncpy_with_null(to_per_gpu.gpu_result.msg, per_gpu_result.gpu_result().msg().c_str(), + MAX_DIAG_MSG_LENGTH); + } + strncpy_with_null(to_result->info, result.info().c_str(), MAX_DIAG_MSG_LENGTH); } - for (int j = 0; j < result.gpu_results_size(); j++) { - auto per_gpu_result = result.gpu_results(j); - rdc_diag_per_gpu_result_t& to_per_gpu = to_result->gpu_results[j]; - to_per_gpu.gpu_index = per_gpu_result.gpu_index(); - to_per_gpu.gpu_result.code = per_gpu_result.gpu_result().code(); - strncpy_with_null(to_per_gpu.gpu_result.msg, per_gpu_result.gpu_result().msg().c_str(), - MAX_DIAG_MSG_LENGTH); + + auto status = reader->Finish(); + if (status.ok()) { + RDC_LOG(RDC_DEBUG, "reader status: success!"); + } else { + RDC_LOG(RDC_ERROR, "reader status: failure!"); } - strncpy_with_null(to_result->info, result.info().c_str(), MAX_DIAG_MSG_LENGTH); return RDC_ST_OK; } diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcDiagnosticLib.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcDiagnosticLib.cc index 7a95174b31..b58e73711c 100644 --- a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcDiagnosticLib.cc +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcDiagnosticLib.cc @@ -20,10 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include - -#include -#include +#include #include "rdc_lib/RdcDiagnosticLibInterface.h" #include "rdc_lib/rdc_common.h" @@ -152,7 +149,7 @@ static rdc_status_t run_compute_queue_test(uint32_t gpu_index, rdc_diag_test_res rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* /*config*/, size_t /*config_size*/, - rdc_diag_test_result_t* result) { + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) { if (result == nullptr || gpu_count == 0) { return RDC_ST_BAD_PARAMETER; } @@ -170,6 +167,10 @@ rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case, // Run test for each GPU. It will continue even // if one GPU test is fail. for (uint32_t i = 0; i < gpu_count; i++) { + if (callback != nullptr && callback->callback != nullptr && callback->cookie != nullptr) { + std::string str = "ROCR test on GPU " + std::to_string(i); + callback->callback(callback->cookie, str.data()); + } switch (test_case) { case RDC_DIAG_SYS_MEM_CHECK: run_memory_test(gpu_index[i], result); diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rvs/RdcDiagnosticLib.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rvs/RdcDiagnosticLib.cc index 3c5850b4c2..876c6475f1 100644 --- a/projects/rdc/rdc_libs/rdc_modules/rdc_rvs/RdcDiagnosticLib.cc +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rvs/RdcDiagnosticLib.cc @@ -57,7 +57,7 @@ rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case, // TODO: use gpu_index uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, size_t config_size, - rdc_diag_test_result_t* result) { + rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) { rvs_status_t rvs_status = RVS_STATUS_SUCCESS; RVS_LOG(); if (result == nullptr || gpu_count == 0) { diff --git a/projects/rdc/rdci/src/RdciDiagSubSystem.cc b/projects/rdc/rdci/src/RdciDiagSubSystem.cc index a926b2ac13..078bc73d29 100644 --- a/projects/rdc/rdci/src/RdciDiagSubSystem.cc +++ b/projects/rdc/rdci/src/RdciDiagSubSystem.cc @@ -155,8 +155,9 @@ void RdciDiagSubSystem::process() { rdc_status_t result; rdc_diag_response_t response; + //rdc_diag_callback_t callback; result = rdc_diagnostic_run(rdc_handle_, group_id_, run_level_, config_test_.c_str(), - config_test_.length(), &response); + config_test_.length(), &response, nullptr); if (result != RDC_ST_OK) { std::string error_msg = rdc_status_string(result); diff --git a/projects/rdc/server/include/rdc/rdc_api_service.h b/projects/rdc/server/include/rdc/rdc_api_service.h index ef38f66cca..ec000f4d12 100644 --- a/projects/rdc/server/include/rdc/rdc_api_service.h +++ b/projects/rdc/server/include/rdc/rdc_api_service.h @@ -24,6 +24,8 @@ THE SOFTWARE. #include +#include + #include "rdc.grpc.pb.h" // NOLINT #include "rdc/rdc.h" @@ -121,13 +123,13 @@ class RdcAPIServiceImpl final : public ::rdc::RdcAPI::Service { ::grpc::Status RemoveAllJob(::grpc::ServerContext* context, const ::rdc::Empty* request, ::rdc::RemoveAllJobResponse* reply) override; - ::grpc::Status DiagnosticRun(::grpc::ServerContext* context, - const ::rdc::DiagnosticRunRequest* request, - ::rdc::DiagnosticRunResponse* reply) override; + ::grpc::Status DiagnosticRun( + ::grpc::ServerContext* context, const ::rdc::DiagnosticRunRequest* request, + ::grpc::ServerWriter< ::rdc::DiagnosticRunResponse>* writer) override; - ::grpc::Status DiagnosticTestCaseRun(::grpc::ServerContext* context, - const ::rdc::DiagnosticTestCaseRunRequest* request, - ::rdc::DiagnosticTestCaseRunResponse* reply) override; + ::grpc::Status DiagnosticTestCaseRun( + ::grpc::ServerContext* context, const ::rdc::DiagnosticTestCaseRunRequest* request, + ::grpc::ServerWriter< ::rdc::DiagnosticTestCaseRunResponse>* writer) override; ::grpc::Status GetMixedComponentVersion(::grpc::ServerContext* context, const ::rdc::GetMixedComponentVersionRequest* request, diff --git a/projects/rdc/server/src/rdc_api_service.cc b/projects/rdc/server/src/rdc_api_service.cc index a88acbe290..ace5aa474c 100644 --- a/projects/rdc/server/src/rdc_api_service.cc +++ b/projects/rdc/server/src/rdc_api_service.cc @@ -23,6 +23,7 @@ THE SOFTWARE. #include #include +#include #include #include @@ -31,6 +32,7 @@ THE SOFTWARE. #include #include "rdc.grpc.pb.h" // NOLINT +#include "rdc.pb.h" #include "rdc/rdc.h" #include "rdc/rdc_private.h" #include "rdc/rdc_server_main.h" @@ -577,26 +579,55 @@ bool RdcAPIServiceImpl::copy_gpu_usage_info(const rdc_gpu_usage_info_t& src, return ::grpc::Status::OK; } -::grpc::Status RdcAPIServiceImpl::DiagnosticRun(::grpc::ServerContext* context, - const ::rdc::DiagnosticRunRequest* request, - ::rdc::DiagnosticRunResponse* reply) { +::grpc::Status RdcAPIServiceImpl::DiagnosticRun( + ::grpc::ServerContext* context, const ::rdc::DiagnosticRunRequest* request, + ::grpc::ServerWriter<::rdc::DiagnosticRunResponse>* writer) { (void)(context); - if (!reply || !request) { + if (!writer || !request) { return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); } + auto cb_lambda = [](void* w, void* m) -> void { + if (w == nullptr) { + RDC_LOG(RDC_ERROR, "BAD WRITER"); + return; + } + + if (m == nullptr) { + RDC_LOG(RDC_ERROR, "BAD MESSAGE"); + return; + } + + auto writer = static_cast<::grpc::ServerWriter<::rdc::DiagnosticRunResponse>*>(w); + char* message = static_cast(m); + + ::rdc::DiagnosticRunResponse reply; + + reply.set_log(std::string(message)); + if (!writer->Write(reply)) { + RDC_LOG(RDC_ERROR, "Failed to write log message"); + } + }; + + rdc_callback_t cb = cb_lambda; + rdc_diag_callback_t callback = { + cb, + writer, + }; + rdc_diag_response_t diag_response; rdc_status_t result = rdc_diagnostic_run( rdc_handle_, request->group_id(), static_cast(request->level()), const_cast(request->config().c_str()), static_cast(request->config().length()), - &diag_response); + &diag_response, &callback); - reply->set_status(result); + ::rdc::DiagnosticRunResponse reply; + reply.set_status(result); if (result != RDC_ST_OK) { return ::grpc::Status::OK; } - ::rdc::DiagnosticResponse* to_response = reply->mutable_response(); + ::rdc::DiagnosticResponse* to_response = reply.mutable_response(); to_response->set_results_count(diag_response.results_count); for (uint32_t i = 0; i < diag_response.results_count; i++) { @@ -626,28 +657,61 @@ bool RdcAPIServiceImpl::copy_gpu_usage_info(const rdc_gpu_usage_info_t& src, to_diag_info->set_info(test_result.info); } + if (!writer->Write(reply)) { + return ::grpc::Status::CANCELLED; + } + return ::grpc::Status::OK; } ::grpc::Status RdcAPIServiceImpl::DiagnosticTestCaseRun( ::grpc::ServerContext* context, const ::rdc::DiagnosticTestCaseRunRequest* request, - ::rdc::DiagnosticTestCaseRunResponse* reply) { + ::grpc::ServerWriter<::rdc::DiagnosticTestCaseRunResponse>* writer) { (void)(context); - if (!reply || !request) { + if (!writer || !request) { return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); } + auto cb_lambda = [](void* w, void* m) -> void { + if (w == nullptr) { + RDC_LOG(RDC_ERROR, "BAD WRITER"); + return; + } + + if (m == nullptr) { + RDC_LOG(RDC_ERROR, "BAD MESSAGE"); + return; + } + + auto writer = static_cast<::grpc::ServerWriter<::rdc::DiagnosticTestCaseRunResponse>*>(w); + char* message = static_cast(m); + + ::rdc::DiagnosticTestCaseRunResponse reply; + + reply.set_log(std::string(message)); + if (!writer->Write(reply)) { + RDC_LOG(RDC_ERROR, "Failed to write log message"); + } + }; + + rdc_callback_t cb = cb_lambda; + rdc_diag_callback_t callback = { + cb, + writer, + }; + rdc_diag_test_result_t test_result; rdc_status_t result = rdc_test_case_run( rdc_handle_, request->group_id(), static_cast(request->test_case()), const_cast(request->config().c_str()), static_cast(request->config().length()), - &test_result); + &test_result, &callback); - reply->set_status(result); + ::rdc::DiagnosticTestCaseRunResponse reply; + reply.set_status(result); if (result != RDC_ST_OK) { return ::grpc::Status::OK; } - ::rdc::DiagnosticTestResult* to_diag_info = reply->mutable_result(); + ::rdc::DiagnosticTestResult* to_diag_info = reply.mutable_result(); to_diag_info->set_status(test_result.status); // details @@ -670,6 +734,10 @@ bool RdcAPIServiceImpl::copy_gpu_usage_info(const rdc_gpu_usage_info_t& src, } to_diag_info->set_info(test_result.info); + if (!writer->Write(reply)) { + return ::grpc::Status::CANCELLED; + } + return ::grpc::Status::OK; }