From 1e5ca6820be9a4c5dee92f943f3ebb59431b5dbd Mon Sep 17 00:00:00 2001 From: Nilesh M Negi Date: Sat, 28 Oct 2023 12:30:36 -0500 Subject: [PATCH] Fix gcnArchName bug in topology dump (#937) Signed-off-by: nileshnegi --- src/graph/topo.cc | 2 +- src/graph/xml.cc | 2 +- src/include/archinfo.h | 2 +- src/misc/archinfo.cc | 16 ++++++++-------- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/graph/topo.cc b/src/graph/topo.cc index 588f9609b0..dd6d01d3c0 100644 --- a/src/graph/topo.cc +++ b/src/graph/topo.cc @@ -372,7 +372,7 @@ ncclResult_t ncclTopoAddGpu(struct ncclXmlNode* xmlGpu, struct ncclTopoSystem* s const char* gcnArch; const char* gcnArchName; NCCLCHECK(xmlGetAttr(xmlGpu, "gcn", &gcnArch)); - convertGcnArchToGcnArchName(gcnArch, gcnArchName); + convertGcnArchToGcnArchName(gcnArch, &gcnArchName); gpu->gpu.gcn = strdup(gcnArchName); rcclHipDeviceArch_t arch; NCCLCHECK(xmlGetAttrInt(xmlGpu, "arch", &arch.value)); diff --git a/src/graph/xml.cc b/src/graph/xml.cc index e68aa092f0..902477a275 100644 --- a/src/graph/xml.cc +++ b/src/graph/xml.cc @@ -650,7 +650,7 @@ ncclResult_t ncclTopoGetXmlFromGpu(struct ncclXmlNode* pciNode, uint32_t rocmDev NCCLCHECK(xmlSetAttr(gpuNode, "gcn", gcn)); } NCCLCHECK(xmlGetAttr(gpuNode, "gcn", &gcn)); - convertGcnArchToGcnArchName(gcn, gcnArchName); + convertGcnArchToGcnArchName(gcn, &gcnArchName); NCCLCHECK(xmlSetAttr(gpuNode, "gcn", gcnArchName)); rcclHipDeviceArch_t arch; diff --git a/src/include/archinfo.h b/src/include/archinfo.h index 3fa4cb47bd..f75613536f 100644 --- a/src/include/archinfo.h +++ b/src/include/archinfo.h @@ -31,7 +31,7 @@ THE SOFTWARE. */ void GcnArchNameFormat(char *gcnArchName, char* out); -void convertGcnArchToGcnArchName(const char* gcnArch, const char* gcnArchName); +void convertGcnArchToGcnArchName(const char* gcnArch, const char** gcnArchName); int GetGcnArchName(int deviceId, char* out); double GetDeviceWallClockRateInKhz(int deviceId); bool IsArchMatch(char const* arch, char const* target); diff --git a/src/misc/archinfo.cc b/src/misc/archinfo.cc index 96b3bd5f25..3cdb5df1e6 100644 --- a/src/misc/archinfo.cc +++ b/src/misc/archinfo.cc @@ -32,24 +32,24 @@ void GcnArchNameFormat(char* gcnArchName, char* out) { strcpy(out, gcnArchNameToken); } -void convertGcnArchToGcnArchName(const char* gcnArch, const char* gcnArchName) { +void convertGcnArchToGcnArchName(const char* gcnArch, const char** gcnArchName) { // gcnArch is deprecated and we should instead use gcnArchName; however, some data files still have // the older gcnArch value. There's only a handful of architectures that were coded prior to deprecation, // so we handle those cases here. if (strcmp(gcnArch, "906") == 0) - gcnArchName = "gfx906"; + *gcnArchName = "gfx906"; else if (strcmp(gcnArch, "908") == 0) - gcnArchName = "gfx908"; + *gcnArchName = "gfx908"; else if (strcmp(gcnArch, "910") == 0) - gcnArchName = "gfx90a"; + *gcnArchName = "gfx90a"; else if (strcmp(gcnArch, "940") == 0) - gcnArchName = "gfx940"; + *gcnArchName = "gfx940"; else if (strcmp(gcnArch, "941") == 0) - gcnArchName = "gfx941"; + *gcnArchName = "gfx941"; else if (strcmp(gcnArch, "942") == 0) - gcnArchName = "gfx942"; + *gcnArchName = "gfx942"; else - gcnArchName = gcnArch; + *gcnArchName = gcnArch; } int GetGcnArchName(int deviceId, char* out) {