From d6006f042556c86eab6e6d15a603dba07928fb62 Mon Sep 17 00:00:00 2001 From: Avinash <44542533+PJAvinash@users.noreply.github.com> Date: Wed, 30 Oct 2024 14:25:56 -0500 Subject: [PATCH] Memory leak fixes in hostside functions (#1388) memory leak fixes for parseRome4P2H and ncclTopoAddGPU --- src/graph/rome_models.cc | 6 +++++- src/graph/topo.cc | 3 ++- src/graph/topo.h | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/graph/rome_models.cc b/src/graph/rome_models.cc index f2b7c0e7ab..ac805fe518 100644 --- a/src/graph/rome_models.cc +++ b/src/graph/rome_models.cc @@ -1391,12 +1391,16 @@ static bool checkOption(const char *options, const char *name) { numTokens++; while (tokens[numTokens-1] != NULL && numTokens < MAX_OPT_TOKENS) tokens[numTokens++] = strtok_r(NULL, "=, ", &state); + + bool result = false; for (int i = 0; i < numTokens/2; i++) { if (strcmp(tokens[i*2], name) == 0) { - return (bool)atol(tokens[i*2+1]); + result = (bool)atol(tokens[i*2+1]); + break; } } free(str_temp); + return result; } return false; } diff --git a/src/graph/topo.cc b/src/graph/topo.cc index 7529636d10..20f97463a6 100644 --- a/src/graph/topo.cc +++ b/src/graph/topo.cc @@ -400,7 +400,8 @@ ncclResult_t ncclTopoAddGpu(struct ncclXmlNode* xmlGpu, struct ncclTopoSystem* s const char* gcnArchName; NCCLCHECK(xmlGetAttr(xmlGpu, "gcn", &gcnArch)); convertGcnArchToGcnArchName(gcnArch, &gcnArchName); - gpu->gpu.gcn = strdup(gcnArchName); + strncpy(gpu->gpu.gcn, gcnArchName, GCN_ARCH_NAME_LEN-1); + gpu->gpu.gcn[GCN_ARCH_NAME_LEN-1] = '\0'; rcclHipDeviceArch_t arch; NCCLCHECK(xmlGetAttrInt(xmlGpu, "arch", &arch.value)); memcpy(&gpu->gpu.arch, &arch.arch, sizeof(hipDeviceArch_t)); diff --git a/src/graph/topo.h b/src/graph/topo.h index a409fc24ba..6f5ad54272 100644 --- a/src/graph/topo.h +++ b/src/graph/topo.h @@ -121,6 +121,8 @@ struct ncclTopoLinkList { #define RCCL_TOPO_FORCE_INTRA 16 #define RCCL_TOPO_XGMI_ALL 32 +#define GCN_ARCH_NAME_LEN 16 + struct ncclTopoNode { int type; int64_t id; @@ -131,7 +133,7 @@ struct ncclTopoNode { int rank; int cudaCompCap; int gdrSupport; - const char* gcn; + char gcn[GCN_ARCH_NAME_LEN]; hipDeviceArch_t arch; int cu; }gpu;