From 75104df3b2c3c95b08c8c3f8d92cc9dcae015694 Mon Sep 17 00:00:00 2001 From: Ioannis Assiouras Date: Fri, 31 May 2024 01:19:09 +0100 Subject: [PATCH] SWDEV-464648 - code and comment cleanups Change-Id: I5ba3f1bff500b3cd5903c2f441017735e688f83f [ROCm/clr commit: 8f42ad6aa3665fa29d1741f3b032282d5c3ae11a] --- .../hip/amd_detail/amd_device_functions.h | 4 +- projects/clr/hipamd/src/hip_embed_pch.sh | 2 - .../tests/ocltst/module/include/BaseTestImp.h | 4 +- .../module/runtime/OCLDeviceQueries.cpp | 123 ++++++------------ .../ocltst/module/runtime/OCLPerfCounters.cpp | 111 ---------------- .../ocltst/module/runtime/OCLPinnedMemory.cpp | 1 - projects/clr/rocclr/device/device.cpp | 28 ++-- projects/clr/rocclr/device/device.hpp | 4 +- .../clr/rocclr/device/pal/palcounters.cpp | 2 +- projects/clr/rocclr/device/pal/paldevice.cpp | 1 - .../clr/rocclr/device/pal/palsettings.cpp | 3 - projects/clr/rocclr/device/rocm/rocdevice.cpp | 9 +- .../clr/rocclr/device/rocm/rocsettings.cpp | 21 ++- 13 files changed, 71 insertions(+), 242 deletions(-) diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h index c4837ad64c..207d638aec 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h @@ -824,8 +824,8 @@ int __syncthreads_or(int predicate) PIPE_ID 7:6 Pipeline from which the wave was dispatched. CU_ID 11:8 Compute Unit the wave is assigned to. SH_ID 12 Shader Array (within an SE) the wave is assigned to. - SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a, gfx940-942 - 14:13 Shader Engine the wave is assigned to for Vega. + SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a + 14:13 Shader Engine the wave is assigned to for gfx940-942 TG_ID 19:16 Thread-group ID VM_ID 23:20 Virtual Memory ID QUEUE_ID 26:24 Queue from which this wave was dispatched. diff --git a/projects/clr/hipamd/src/hip_embed_pch.sh b/projects/clr/hipamd/src/hip_embed_pch.sh index 9b01da8726..6c92d43884 100755 --- a/projects/clr/hipamd/src/hip_embed_pch.sh +++ b/projects/clr/hipamd/src/hip_embed_pch.sh @@ -144,14 +144,12 @@ EOF set -x - # For gfx10/Navi devices $LLVM_DIR/bin/clang -O3 --hip-path=$HIP_INC_DIR/.. -std=c++17 -nogpulib -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only --cuda-gpu-arch=gfx1030 -x hip $tmp/hip_pch.h -E >$tmp/pch_wave32.cui && cat $tmp/hip_macros.h >> $tmp/pch_wave32.cui && $LLVM_DIR/bin/clang -cc1 -O3 -emit-pch -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -std=c++17 -fgnuc-version=4.2.1 -o $tmp/hip_wave32.pch -x hip-cpp-output - <$tmp/pch_wave32.cui && - # For other devices $LLVM_DIR/bin/clang -O3 --hip-path=$HIP_INC_DIR/.. -std=c++17 -nogpulib -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only -x hip $tmp/hip_pch.h -E >$tmp/pch_wave64.cui && cat $tmp/hip_macros.h >> $tmp/pch_wave64.cui && diff --git a/projects/clr/opencl/tests/ocltst/module/include/BaseTestImp.h b/projects/clr/opencl/tests/ocltst/module/include/BaseTestImp.h index 47ff996893..6c8249b7a3 100644 --- a/projects/clr/opencl/tests/ocltst/module/include/BaseTestImp.h +++ b/projects/clr/opencl/tests/ocltst/module/include/BaseTestImp.h @@ -119,12 +119,12 @@ class BaseTestImp : public OCLTest { virtual void open(unsigned int test, char* units, double& conversion, unsigned int deviceId, unsigned int platformIndex) { - return open(test, "Tahiti", platformIndex); + return open(test, "", platformIndex); } virtual void open(unsigned int test, char* units, double& conversion, unsigned int deviceId) { - return open(test, "Tahiti", 0); + return open(test, "", 0); } virtual void run(void) = 0; diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceQueries.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceQueries.cpp index 6e507cbe3e..efcf86fe21 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceQueries.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceQueries.cpp @@ -27,6 +27,12 @@ #include "CL/cl.h" #include "CL/cl_ext.h" +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + struct AMDDeviceInfo { const char* targetName_; //!< Target name const char* machineTarget_; //!< Machine target @@ -42,85 +48,35 @@ struct AMDDeviceInfo { static const cl_uint Ki = 1024; static const AMDDeviceInfo DeviceInfo[] = { - /* CAL_TARGET_CAYMAN */ - {"Cayman", "cayman", 1, 16, 4, 256, 32 * Ki, 32, 5, 0}, - /* CAL_TARGET_TAHITI */ - {"Tahiti", "tahiti", 4, 16, 1, 256, 64 * Ki, 32, 6, 0}, - /* CAL_TARGET_PITCAIRN */ - {"Pitcairn", "pitcairn", 4, 16, 1, 256, 64 * Ki, 32, 6, 0}, - /* CAL_TARGET_CAPEVERDE */ - {"Capeverde", "capeverde", 4, 16, 1, 256, 64 * Ki, 32, 6, 0}, - /* CAL_TARGET_DEVASTATOR */ - {"Devastator", "trinity", 1, 16, 4, 256, 32 * Ki, 32, 5, 0}, - /* CAL_TARGET_SCRAPPER */ - {"Scrapper", "trinity", 1, 16, 4, 256, 32 * Ki, 32, 5, 0}, - /* CAL_TARGET_OLAND */ {"Oland", "oland", 4, 16, 1, 256, 64 * Ki, 32, 6, 0}, - /* CAL_TARGET_BONAIRE */ - {"Bonaire", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 7, 2}, - /* CAL_TARGET_SPECTRE */ - {"Spectre", "spectre", 4, 16, 1, 256, 64 * Ki, 32, 7, 1}, - /* CAL_TARGET_SPOOKY */ - {"Spooky", "spooky", 4, 16, 1, 256, 64 * Ki, 32, 7, 1}, - /* CAL_TARGET_KALINDI */ - {"Kalindi", "kalindi", 4, 16, 1, 256, 64 * Ki, 32, 7, 2}, - /* CAL_TARGET_HAINAN */ - {"Hainan", "hainan", 4, 16, 1, 256, 64 * Ki, 32, 6, 0}, - /* CAL_TARGET_HAWAII */ - {"Hawaii", "hawaii", 4, 16, 1, 256, 64 * Ki, 32, 7, 2}, - /* CAL_TARGET_ICELAND */ - {"Iceland", "iceland", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* CAL_TARGET_TONGA */ {"Tonga", "tonga", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* CAL_TARGET_MULLINS */ - {"Mullins", "mullins", 4, 16, 1, 256, 64 * Ki, 32, 7, 2}, - /* CAL_TARGET_FIJI */ {"Fiji", "fiji", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* CAL_TARGET_CARRIZO */ - {"Carrizo", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* CAL_TARGET_CARRIZO */ - {"Bristol Ridge", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* CAL_TARGET_Ellesmere */ - {"Ellesmere", "ellesmere", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* CAL_TARGET_BAFFIN */ - {"Baffin", "baffin", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* ROCM Kaveri */ {"gfx700", "gfx700", 4, 16, 1, 256, 64 * Ki, 32, 7, 1}, - /* ROCM Hawaii */ {"gfx701", "gfx701", 4, 16, 1, 256, 64 * Ki, 32, 7, 2}, - /* ROCM Kabini */ {"gfx703", "gfx703", 4, 16, 1, 256, 64 * Ki, 32, 7, 2}, - /* ROCM Iceland */ {"gfx800", "gfx800", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* ROCM Carrizo */ {"gfx801", "gfx801", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* ROCM Tonga */ {"gfx802", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* ROCM Fiji */ {"gfx803", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* Vega10 */ {"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* CAL_TARGET_STONEY */ - {"Stoney", "stoney", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* CAL_TARGET_LEXA */ - {"gfx804", "gfx804", 4, 16, 1, 256, 64 * Ki, 32, 8, 0}, - /* Vega10_XNACK */ {"gfx901", "gfx901", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* Raven */ {"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* Raven_XNACK */ {"gfx903", "gfx903", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* Vega12 */ {"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* Vega12_XNACK */ {"gfx905", "gfx905", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* Vega20 */ {"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* Vega20_XNACK */ {"gfx907", "gfx907", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* MI100 */ {"gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* MI200 */ {"gfx90a", "gfx90a", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, - /* MI300 */ {"gfx940", "gfx940", 4, 16, 1, 256, 64 * Ki, 32, 9, 4}, - /* MI300X */ {"gfx941", "gfx941", 4, 16, 1, 256, 64 * Ki, 32, 9, 4}, - /* MI300X1*/ {"gfx942", "gfx942", 4, 16, 1, 256, 64 * Ki, 32, 9, 4}, - /* Navi10 */ {"gfx1010", "gfx1010", 4, 32, 1, 256, 64 * Ki, 32, 10, 1}, - /* Navi12 */ {"gfx1011", "gfx1011", 4, 32, 1, 256, 64 * Ki, 32, 10, 1}, - /* Navi14 */ {"gfx1012", "gfx1012", 4, 32, 1, 256, 64 * Ki, 32, 10, 1}, - /* Navi21 */ { "gfx1030", "gfx1030", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, - /* Navi22 */ { "gfx1031", "gfx1031", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, - /* Navi23 */ { "gfx1032", "gfx1032", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, - /* Van Gogh */ { "gfx1033", "gfx1033", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, - /* Navi24 */ { "gfx1034", "gfx1034", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, - /* Rembrandt */{ "gfx1035", "gfx1035", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, - /* Raphael */ { "gfx1036", "gfx1036", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, - /* Navi31*/ { "gfx1100", "gfx1100", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 }, - /* Navi32*/ { "gfx1101", "gfx1101", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 }, - /* Navi33*/ { "gfx1102", "gfx1102", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 }, - /* Phoenix */ { "gfx1103", "gfx1103", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 }, - { "gfx1200", "gfx1200", 4, 32, 1, 256, 64 * Ki, 32, 12, 0 }, - { "gfx1201", "gfx1201", 4, 32, 1, 256, 64 * Ki, 32, 12, 0 }, + {"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx901", "gfx901", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx903", "gfx903", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx905", "gfx905", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx907", "gfx907", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx90a", "gfx90a", 4, 16, 1, 256, 64 * Ki, 32, 9, 0}, + {"gfx940", "gfx940", 4, 16, 1, 256, 64 * Ki, 32, 9, 4}, + {"gfx941", "gfx941", 4, 16, 1, 256, 64 * Ki, 32, 9, 4}, + {"gfx942", "gfx942", 4, 16, 1, 256, 64 * Ki, 32, 9, 4}, + {"gfx1010", "gfx1010", 4, 32, 1, 256, 64 * Ki, 32, 10, 1}, + {"gfx1011", "gfx1011", 4, 32, 1, 256, 64 * Ki, 32, 10, 1}, + {"gfx1012", "gfx1012", 4, 32, 1, 256, 64 * Ki, 32, 10, 1}, + {"gfx1030", "gfx1030", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, + {"gfx1031", "gfx1031", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, + {"gfx1032", "gfx1032", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, + {"gfx1033", "gfx1033", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, + {"gfx1034", "gfx1034", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, + {"gfx1035", "gfx1035", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, + {"gfx1036", "gfx1036", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 }, + {"gfx1100", "gfx1100", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 }, + {"gfx1101", "gfx1101", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 }, + {"gfx1102", "gfx1102", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 }, + {"gfx1103", "gfx1103", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 }, + {"gfx1200", "gfx1200", 4, 32, 1, 256, 64 * Ki, 32, 12, 0 }, + {"gfx1201", "gfx1201", 4, 32, 1, 256, 64 * Ki, 32, 12, 0 }, }; const int DeviceInfoSize = sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); @@ -178,7 +134,14 @@ void OCLDeviceQueries::open(unsigned int test, char* units, double& conversion, break; } } - CHECK_RESULT(deviceFound != true, "Device %s is not supported", name); + + if (!deviceFound) { + char msg[256]; + SNPRINTF(msg, sizeof(msg), "Unsupported device(%s) for the test!\t", + name); + testDescString = msg; + return; + } error_ = _wrapper->clGetDeviceInfo(devices_[deviceId], CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLPerfCounters.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLPerfCounters.cpp index 84ac050c77..8cf08399d1 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLPerfCounters.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLPerfCounters.cpp @@ -115,108 +115,6 @@ static const DeviceCounterInfo DeviceInfo[]{ 9, {{14, 0, 4}, {97, 1, 2}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {MCVML2_l, // reg 0, BigK bank 0 hits} - // Sea Islands, GFX8 - {"Bonaire", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Hawaii", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Maui", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Casper", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Spectre", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Slimer", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Spooky", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Kalindi", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Mullins", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Iceland", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Tonga", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Bermuda", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Fiji", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Carrizo", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Ellesmere", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Baffin", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Stoney", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"gfx804", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"gfx803", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Bristol Ridge", - 0, - {{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - // Southern Islands - {"Tahiti", - 0, - {{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Pitcairn", - 0, - {{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Capeverde", - 0, - {{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Oland", - 0, - {{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} - {"Hainan", - 0, - {{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0, - // GRBM_PERF_SEL_CP_BUSY} }; const int DeviceCounterSize = sizeof(DeviceInfo) / sizeof(DeviceCounterInfo); @@ -695,15 +593,6 @@ void OCLPerfCounters::run(void) { deviceName[idx] = '\0'; } - // Begin: to be removed when crash on Kabini is fixed - if (strcmp(deviceName, "Kalindi") == 0) { - char msg[256]; - SNPRINTF(msg, sizeof(msg), "Exiting as device is Kabini!\t"); - testDescString = msg; - return; - } - // End: to be removed when crash on Kabini is fixed - bool found = false; unsigned int devId = 0; for (int idx = 0; !found && idx < DeviceCounterSize; idx++) { diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp index cb59c9fbc8..b0ee1615d4 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp @@ -59,7 +59,6 @@ void OCLPinnedMemory::open(unsigned int test, char* units, double& conversion, cl_int status; - // Observed failures with Carrizo on GSL path cl_bool is_apu; status = clGetDeviceInfo(devices_[deviceId], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_bool), &is_apu, nullptr); diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index 4e8dd9a0a2..330f56e15f 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -112,31 +112,25 @@ std::pair Isa::supportedIsas() { // Supported Version Features // SIMD/ SIMD Instr Bank LDS Mem // Target ID HSAIL ID ROC PAL Maj/Min/Stp SRAMECC XNACK CU Width Width Width Size Banks - {"gfx700", "Kaveri", true, false, 7, 0, 0, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Spectre, Spooky, Kalindi - {"gfx701", "Hawaii", true, false, 7, 0, 1, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaiipro - {"gfx702", "gfx702", true, false, 7, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaii (can execute Hawiipro code) - {"gfx703", nullptr, false, false, 7, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Mullins - {"gfx704", "Bonaire", false, false, 7, 0, 4, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx705", "Mullins", false, false, 7, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Godavari {"gfx801", nullptr, true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx801:xnack-", nullptr, true, false, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx801:xnack+", "Carrizo", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx802", "Tonga", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Iceland - {"gfx803", "Fiji", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Ellesmere/Polaris10, Baffin/Polaris11, Polaris12, Polaris22/VegaM - {"gfx805", nullptr, true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Tongapro + {"gfx801:xnack+", "gfx801", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx802", "gfx802", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx803", "gfx803", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx805", nullptr, true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx810", nullptr, true, true, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx810:xnack-", nullptr, true, false, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx810:xnack+", "Stoney", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx900", "gfx901", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Greenland + {"gfx810:xnack+", "gfx810", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx900", "gfx901", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx900:xnack-", "gfx900", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx900:xnack+", "gfx901", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx902", "gfx903", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven + {"gfx902", "gfx903", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx902:xnack-", "gfx902", true, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx902:xnack+", "gfx903", true, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx904", "gfx905", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega12 + {"gfx904", "gfx905", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx904:xnack-", "gfx904", true, true, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx904:xnack+", "gfx905", true, true, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx906", "gfx907", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega20 + {"gfx906", "gfx907", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx906:sramecc-", "gfx907", true, true, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx906:sramecc+", nullptr, true, true, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx906:xnack-", "gfx906", true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32}, @@ -154,7 +148,7 @@ std::pair Isa::supportedIsas() { {"gfx908:sramecc-:xnack+", nullptr, true, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx908:sramecc+:xnack-", nullptr, true, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx908:sramecc+:xnack+", nullptr, true, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx909", nullptr, false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code) + {"gfx909", nullptr, false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx909:xnack-", nullptr, false, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx909:xnack+", nullptr, false, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a", nullptr, true, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, @@ -193,7 +187,7 @@ std::pair Isa::supportedIsas() { {"gfx942:sramecc-:xnack+", nullptr, true, false, 9, 4, 2, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx942:sramecc+:xnack-", nullptr, true, false, 9, 4, 2, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx942:sramecc+:xnack+", nullptr, true, false, 9, 4, 2, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Renoir + {"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90c:xnack-", "gfx90c", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90c:xnack+", "gfx90d", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx1010", "gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 73e4f0bb60..8c8cab605f 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -657,11 +657,9 @@ class Settings : public amd::HeapObject { DeviceKernelArgs, //!< Device memory kernel arguments with no memory //!< ordering workaround (e.g. XGMI) DeviceKernelArgsReadback, //!< Device memory kernel arguments with kernel - //!< argument readback workaround (works only in - //!< ASICS >= MI200) + //!< argument readback workaround DeviceKernelArgsHDP //!< Device memory kernel arguments with kernel //!< argument readback plus HDP flush workaround. - //!< Works in all ASICS. Requires a valid hdp flush register }; uint64_t extensions_; //!< Supported OCL extensions diff --git a/projects/clr/rocclr/device/pal/palcounters.cpp b/projects/clr/rocclr/device/pal/palcounters.cpp index ad74fd7eff..d0968606b2 100644 --- a/projects/clr/rocclr/device/pal/palcounters.cpp +++ b/projects/clr/rocclr/device/pal/palcounters.cpp @@ -662,7 +662,7 @@ static constexpr std::array, 140> gfx10BlockIdPal = {{ {0x2B, 0}, // GCR - 136 {0x2C, 0}, // PH - 137 {0x2D, 0}, // UTCL1 - 138 - {0x31, 0}, // SqWgp - 139 For Navi3x + {0x31, 0}, // SqWgp - 139 }}; void PerfCounter::convertInfo() { diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index e0a692582e..fcbe91aea4 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -906,7 +906,6 @@ bool Device::create(Pal::IDevice* device) { static_cast(PAL_FORCE_ASIC_REVISION); // XNACK flag should be set for PageMigration or IOMMUv2 support. - // Note: Navi2x should have a fix in HW. bool isXNACKEnabled = (static_cast(properties().gpuMemoryProperties.flags.pageMigrationEnabled || properties().gpuMemoryProperties.flags.iommuv2Support)); diff --git a/projects/clr/rocclr/device/pal/palsettings.cpp b/projects/clr/rocclr/device/pal/palsettings.cpp index 45d16fad96..c11fe9d741 100644 --- a/projects/clr/rocclr/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/device/pal/palsettings.cpp @@ -360,9 +360,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, // Report FP_FAST_FMA define if double precision HW reportFMA_ = true; - // FMA is 1/4 speed on Pitcairn, Cape Verde, Devastator and Scrapper - // Bonaire, Kalindi, Spectre and Spooky so disable - // FP_FMA_FMAF for those parts in switch below reportFMAF_ = true; if (doublePrecision_) { diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index cec9853ae2..05e7ff169d 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -1244,7 +1244,6 @@ bool Device::populateOCLDeviceConstants() { return false; } - //TODO: add the assert statement for Raven if (!(isa().versionMajor() == 9 && isa().versionMinor() == 0 && isa().versionStepping() == 2)) { if (info_.maxEngineClockFrequency_ <= 0) { LogError("maxEngineClockFrequency_ is NOT positive!"); @@ -1511,7 +1510,6 @@ bool Device::populateOCLDeviceConstants() { ::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1); - // Enable OpenCL 2.0 for Vega10+ if (isa().versionMajor() >= 9) { info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"2.0" " "; } else { @@ -1672,14 +1670,11 @@ bool Device::populateOCLDeviceConstants() { info_.svmCapabilities_ |= CL_DEVICE_SVM_FINE_GRAIN_SYSTEM; } if (amd::IS_HIP) { - // Report atomics capability based on GFX IP, control on Hawaii if (info_.iommuv2_ || isa().versionMajor() >= 8) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } } else if (!settings().useLightning_) { - // Report atomics capability based on GFX IP, control on Hawaii - // and Vega10. if (info_.iommuv2_ || (isa().versionMajor() == 8)) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } @@ -1874,8 +1869,8 @@ bool Device::populateOCLDeviceConstants() { } break; case (9): - if ((isa().versionMinor() == 0 && isa().versionStepping() == 10) || // For gfx90a (MI200) - (isa().versionMinor() == 4)) { // For gfx94x (MI300) + if ((isa().versionMinor() == 0 && isa().versionStepping() == 10) || + (isa().versionMinor() == 4)) { info_.vgprAllocGranularity_ = 8; info_.vgprsPerSimd_ = 512; } else { diff --git a/projects/clr/rocclr/device/rocm/rocsettings.cpp b/projects/clr/rocclr/device/rocm/rocsettings.cpp index e1de3a1422..92b67fc86c 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.cpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.cpp @@ -246,12 +246,12 @@ void Settings::setKernelArgImpl(const amd::Isa& isa, bool isXgmi, bool hasValidH const uint32_t gfxipMinor = isa.versionMinor(); const uint32_t gfxStepping = isa.versionStepping(); - const bool isMI300 = gfxipMajor == 9 && gfxipMinor == 4 && + const bool isGfx94x = gfxipMajor == 9 && gfxipMinor == 4 && (gfxStepping == 0 || gfxStepping == 1 || gfxStepping == 2); - const bool isMI200 = (gfxipMajor == 9 && gfxipMinor == 0 && gfxStepping == 10); - const bool isPreMI100 = + const bool isGfx90a = (gfxipMajor == 9 && gfxipMinor == 0 && gfxStepping == 10); + const bool isPreGfx908 = (gfxipMajor < 9) || ((gfxipMajor == 9) && (gfxipMinor == 0) && (gfxStepping < 8)); - const bool isNavi10 = + const bool isGfx101x = (gfxipMajor == 10) && ((gfxipMinor == 0) || (gfxipMinor == 1)); auto kernelArgImpl = KernelArgImpl::HostKernelArgs; @@ -262,21 +262,18 @@ void Settings::setKernelArgImpl(const amd::Isa& isa, bool isXgmi, bool hasValidH kernelArgImpl = KernelArgImpl::DeviceKernelArgs; } else if (hasValidHDPFlush) { // If the HDP flush register is valid implement the HDP flush to MMIO - // workaround. This does not work on gfx9 devices before MI100 or Navi10 - // devices - if (!(isPreMI100 || isNavi10)) { + // workaround. + if (!(isPreGfx908 || isGfx101x)) { kernelArgImpl = KernelArgImpl::DeviceKernelArgsHDP; } - } else if (isMI300 || isMI200) { + } else if (isGfx94x || isGfx90a) { // Implement the kernel argument readback workaround // (write all args -> sfence -> write last byte -> mfence -> read last byte) - // It works only on MI200 and MI300 because of the strict guarantee on - // ordering of stores in those ASICS kernelArgImpl = KernelArgImpl::DeviceKernelArgsReadback; } - // Enable device kernel args for MI300* for now - if (isMI300) { + // Enable device kernel args for gfx94x for now + if (isGfx94x) { kernel_arg_impl_ = kernelArgImpl; kernel_arg_opt_ = true; }