From 3a479a25adf18ac68ccb3e8bf021ba83919024f3 Mon Sep 17 00:00:00 2001 From: "systems-assistant[bot]" <221163467+systems-assistant[bot]@users.noreply.github.com> Date: Tue, 27 Jan 2026 08:29:16 -0700 Subject: [PATCH] 8 bytes mem leak fix (#2764) * 8 bytes mem leak fix * Adding a missing free() * Clean up commented lines * Add stdup fail check, memory ownership info * Add stdup fail check, memory ownership info --------- Co-authored-by: PJAvinash Co-authored-by: Corey Derochie <161367113+corey-derochie-amd@users.noreply.github.com> Co-authored-by: Avinash <44542533+PJAvinash@users.noreply.github.com> Co-authored-by: systems-assistant[bot] --- projects/rccl/src/include/comm.h | 2 +- projects/rccl/src/init.cc | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/projects/rccl/src/include/comm.h b/projects/rccl/src/include/comm.h index 9556994064..694fee1083 100644 --- a/projects/rccl/src/include/comm.h +++ b/projects/rccl/src/include/comm.h @@ -747,7 +747,7 @@ struct ncclComm { int unroll; // custom collective [RCCL] bool enableCustColl; - // gfx name from hipDeviceProp_t [RCCL] + // gfx name from hipDeviceProp_t [RCCL] , Memory resource owned by comm allocated in ncclCommInitRankFunc char* archName; // multiProcessorCount from hipDeviceProp_t [RCCL] int cuCount; diff --git a/projects/rccl/src/init.cc b/projects/rccl/src/init.cc index def51b40ea..130dd60af6 100644 --- a/projects/rccl/src/init.cc +++ b/projects/rccl/src/init.cc @@ -612,6 +612,7 @@ skip_profiling: free(comm->topParentRanks); free(comm->topParentLocalRanks); free(comm->gproxyConn); + free(comm->archName); NCCLCHECK(ncclRegCleanup(comm)); @@ -2149,12 +2150,12 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) { CUDACHECKGOTO(hipGetDeviceProperties(&devProp, cudaDev), res, fail); cuCount = devProp.multiProcessorCount; - archName = (char*)malloc(strlen(devProp.gcnArchName) + 1); - if (archName == nullptr) { - WARN("Failed to allocate memory for architecture name"); + archName = strdup(devProp.gcnArchName); + if (!archName) { + res = ncclSystemError; + WARN("strdup failed for architecture name"); goto fail; } - strcpy(archName, devProp.gcnArchName); timers[TIMER_INIT_KERNELS] = clockNano(); NCCLCHECK(ncclInitKernelsForDevice(cudaArch, maxSharedMem, &maxLocalSizeBytes));