* 8 bytes mem leak fix

* Adding a missing free()

* Clean up commented lines

* Add stdup fail check, memory ownership info

* Add stdup fail check, memory ownership info

---------

Co-authored-by: PJAvinash <avinashindian2.0@gmail.com>
Co-authored-by: Corey Derochie <161367113+corey-derochie-amd@users.noreply.github.com>
Co-authored-by: Avinash <44542533+PJAvinash@users.noreply.github.com>
Co-authored-by: systems-assistant[bot] <systems-assistant[bot]@users.noreply.github.com>
Цей коміт міститься в:
systems-assistant[bot]
2026-01-27 08:29:16 -07:00
зафіксовано GitHub
джерело baf676f003
коміт 3a479a25ad
2 змінених файлів з 6 додано та 5 видалено
+1 -1
Переглянути файл
@@ -747,7 +747,7 @@ struct ncclComm {
int unroll;
// custom collective [RCCL]
bool enableCustColl;
// gfx name from hipDeviceProp_t [RCCL]
// gfx name from hipDeviceProp_t [RCCL] , Memory resource owned by comm allocated in ncclCommInitRankFunc
char* archName;
// multiProcessorCount from hipDeviceProp_t [RCCL]
int cuCount;
+5 -4
Переглянути файл
@@ -612,6 +612,7 @@ skip_profiling:
free(comm->topParentRanks);
free(comm->topParentLocalRanks);
free(comm->gproxyConn);
free(comm->archName);
NCCLCHECK(ncclRegCleanup(comm));
@@ -2149,12 +2150,12 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {
CUDACHECKGOTO(hipGetDeviceProperties(&devProp, cudaDev), res, fail);
cuCount = devProp.multiProcessorCount;
archName = (char*)malloc(strlen(devProp.gcnArchName) + 1);
if (archName == nullptr) {
WARN("Failed to allocate memory for architecture name");
archName = strdup(devProp.gcnArchName);
if (!archName) {
res = ncclSystemError;
WARN("strdup failed for architecture name");
goto fail;
}
strcpy(archName, devProp.gcnArchName);
timers[TIMER_INIT_KERNELS] = clockNano();
NCCLCHECK(ncclInitKernelsForDevice(cudaArch, maxSharedMem, &maxLocalSizeBytes));