8 bytes mem leak fix (#2764)

* 8 bytes mem leak fix * Adding a missing free() * Clean up commented lines * Add stdup fail check, memory ownership info * Add stdup fail check, memory ownership info --------- Co-authored-by: PJAvinash <avinashindian2.0@gmail.com> Co-authored-by: Corey Derochie <161367113+corey-derochie-amd@users.noreply.github.com> Co-authored-by: Avinash <44542533+PJAvinash@users.noreply.github.com> Co-authored-by: systems-assistant[bot] <systems-assistant[bot]@users.noreply.github.com>
2026-01-27 08:29:16 -07:00
@@ -747,7 +747,7 @@ struct ncclComm {
  int unroll;
  // custom collective [RCCL]
  bool enableCustColl;
-  // gfx name from hipDeviceProp_t [RCCL]
+  // gfx name from hipDeviceProp_t [RCCL] , Memory resource owned by comm allocated in ncclCommInitRankFunc
  char* archName;
  // multiProcessorCount from hipDeviceProp_t [RCCL]
  int cuCount;
@@ -612,6 +612,7 @@ skip_profiling:
  free(comm->topParentRanks);
  free(comm->topParentLocalRanks);
  free(comm->gproxyConn);
+  free(comm->archName);

  NCCLCHECK(ncclRegCleanup(comm));

@@ -2149,12 +2150,12 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {

  CUDACHECKGOTO(hipGetDeviceProperties(&devProp, cudaDev), res, fail);
  cuCount = devProp.multiProcessorCount;
-  archName = (char*)malloc(strlen(devProp.gcnArchName) + 1);
-  if (archName == nullptr) {
-    WARN("Failed to allocate memory for architecture name");
+  archName = strdup(devProp.gcnArchName);
+  if (!archName) {
+    res = ncclSystemError;
+    WARN("strdup failed for architecture name");
    goto fail;
  }
-  strcpy(archName, devProp.gcnArchName);

  timers[TIMER_INIT_KERNELS] = clockNano();
  NCCLCHECK(ncclInitKernelsForDevice(cudaArch, maxSharedMem, &maxLocalSizeBytes));