SWDEV-464648 - code and comment cleanups

Change-Id: I5ba3f1bff500b3cd5903c2f441017735e688f83f


[ROCm/clr commit: 8f42ad6aa3]
This commit is contained in:
Ioannis Assiouras
2024-05-31 01:19:09 +01:00
parent 407d1346f2
commit 75104df3b2
13 ha cambiato i file con 71 aggiunte e 242 eliminazioni
@@ -824,8 +824,8 @@ int __syncthreads_or(int predicate)
PIPE_ID 7:6 Pipeline from which the wave was dispatched.
CU_ID 11:8 Compute Unit the wave is assigned to.
SH_ID 12 Shader Array (within an SE) the wave is assigned to.
SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a, gfx940-942
14:13 Shader Engine the wave is assigned to for Vega.
SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a
14:13 Shader Engine the wave is assigned to for gfx940-942
TG_ID 19:16 Thread-group ID
VM_ID 23:20 Virtual Memory ID
QUEUE_ID 26:24 Queue from which this wave was dispatched.
@@ -144,14 +144,12 @@ EOF
set -x
# For gfx10/Navi devices
$LLVM_DIR/bin/clang -O3 --hip-path=$HIP_INC_DIR/.. -std=c++17 -nogpulib -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only --cuda-gpu-arch=gfx1030 -x hip $tmp/hip_pch.h -E >$tmp/pch_wave32.cui &&
cat $tmp/hip_macros.h >> $tmp/pch_wave32.cui &&
$LLVM_DIR/bin/clang -cc1 -O3 -emit-pch -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -std=c++17 -fgnuc-version=4.2.1 -o $tmp/hip_wave32.pch -x hip-cpp-output - <$tmp/pch_wave32.cui &&
# For other devices
$LLVM_DIR/bin/clang -O3 --hip-path=$HIP_INC_DIR/.. -std=c++17 -nogpulib -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only -x hip $tmp/hip_pch.h -E >$tmp/pch_wave64.cui &&
cat $tmp/hip_macros.h >> $tmp/pch_wave64.cui &&
@@ -119,12 +119,12 @@ class BaseTestImp : public OCLTest {
virtual void open(unsigned int test, char* units, double& conversion,
unsigned int deviceId, unsigned int platformIndex) {
return open(test, "Tahiti", platformIndex);
return open(test, "", platformIndex);
}
virtual void open(unsigned int test, char* units, double& conversion,
unsigned int deviceId) {
return open(test, "Tahiti", 0);
return open(test, "", 0);
}
virtual void run(void) = 0;
@@ -27,6 +27,12 @@
#include "CL/cl.h"
#include "CL/cl_ext.h"
#ifdef WIN_OS
#define SNPRINTF sprintf_s
#else
#define SNPRINTF snprintf
#endif
struct AMDDeviceInfo {
const char* targetName_; //!< Target name
const char* machineTarget_; //!< Machine target
@@ -42,85 +48,35 @@ struct AMDDeviceInfo {
static const cl_uint Ki = 1024;
static const AMDDeviceInfo DeviceInfo[] = {
/* CAL_TARGET_CAYMAN */
{"Cayman", "cayman", 1, 16, 4, 256, 32 * Ki, 32, 5, 0},
/* CAL_TARGET_TAHITI */
{"Tahiti", "tahiti", 4, 16, 1, 256, 64 * Ki, 32, 6, 0},
/* CAL_TARGET_PITCAIRN */
{"Pitcairn", "pitcairn", 4, 16, 1, 256, 64 * Ki, 32, 6, 0},
/* CAL_TARGET_CAPEVERDE */
{"Capeverde", "capeverde", 4, 16, 1, 256, 64 * Ki, 32, 6, 0},
/* CAL_TARGET_DEVASTATOR */
{"Devastator", "trinity", 1, 16, 4, 256, 32 * Ki, 32, 5, 0},
/* CAL_TARGET_SCRAPPER */
{"Scrapper", "trinity", 1, 16, 4, 256, 32 * Ki, 32, 5, 0},
/* CAL_TARGET_OLAND */ {"Oland", "oland", 4, 16, 1, 256, 64 * Ki, 32, 6, 0},
/* CAL_TARGET_BONAIRE */
{"Bonaire", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 7, 2},
/* CAL_TARGET_SPECTRE */
{"Spectre", "spectre", 4, 16, 1, 256, 64 * Ki, 32, 7, 1},
/* CAL_TARGET_SPOOKY */
{"Spooky", "spooky", 4, 16, 1, 256, 64 * Ki, 32, 7, 1},
/* CAL_TARGET_KALINDI */
{"Kalindi", "kalindi", 4, 16, 1, 256, 64 * Ki, 32, 7, 2},
/* CAL_TARGET_HAINAN */
{"Hainan", "hainan", 4, 16, 1, 256, 64 * Ki, 32, 6, 0},
/* CAL_TARGET_HAWAII */
{"Hawaii", "hawaii", 4, 16, 1, 256, 64 * Ki, 32, 7, 2},
/* CAL_TARGET_ICELAND */
{"Iceland", "iceland", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* CAL_TARGET_TONGA */ {"Tonga", "tonga", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* CAL_TARGET_MULLINS */
{"Mullins", "mullins", 4, 16, 1, 256, 64 * Ki, 32, 7, 2},
/* CAL_TARGET_FIJI */ {"Fiji", "fiji", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* CAL_TARGET_CARRIZO */
{"Carrizo", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* CAL_TARGET_CARRIZO */
{"Bristol Ridge", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* CAL_TARGET_Ellesmere */
{"Ellesmere", "ellesmere", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* CAL_TARGET_BAFFIN */
{"Baffin", "baffin", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* ROCM Kaveri */ {"gfx700", "gfx700", 4, 16, 1, 256, 64 * Ki, 32, 7, 1},
/* ROCM Hawaii */ {"gfx701", "gfx701", 4, 16, 1, 256, 64 * Ki, 32, 7, 2},
/* ROCM Kabini */ {"gfx703", "gfx703", 4, 16, 1, 256, 64 * Ki, 32, 7, 2},
/* ROCM Iceland */ {"gfx800", "gfx800", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* ROCM Carrizo */ {"gfx801", "gfx801", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* ROCM Tonga */ {"gfx802", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* ROCM Fiji */ {"gfx803", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* Vega10 */ {"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* CAL_TARGET_STONEY */
{"Stoney", "stoney", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* CAL_TARGET_LEXA */
{"gfx804", "gfx804", 4, 16, 1, 256, 64 * Ki, 32, 8, 0},
/* Vega10_XNACK */ {"gfx901", "gfx901", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* Raven */ {"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* Raven_XNACK */ {"gfx903", "gfx903", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* Vega12 */ {"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* Vega12_XNACK */ {"gfx905", "gfx905", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* Vega20 */ {"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* Vega20_XNACK */ {"gfx907", "gfx907", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* MI100 */ {"gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* MI200 */ {"gfx90a", "gfx90a", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
/* MI300 */ {"gfx940", "gfx940", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
/* MI300X */ {"gfx941", "gfx941", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
/* MI300X1*/ {"gfx942", "gfx942", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
/* Navi10 */ {"gfx1010", "gfx1010", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
/* Navi12 */ {"gfx1011", "gfx1011", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
/* Navi14 */ {"gfx1012", "gfx1012", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
/* Navi21 */ { "gfx1030", "gfx1030", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
/* Navi22 */ { "gfx1031", "gfx1031", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
/* Navi23 */ { "gfx1032", "gfx1032", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
/* Van Gogh */ { "gfx1033", "gfx1033", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
/* Navi24 */ { "gfx1034", "gfx1034", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
/* Rembrandt */{ "gfx1035", "gfx1035", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
/* Raphael */ { "gfx1036", "gfx1036", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
/* Navi31*/ { "gfx1100", "gfx1100", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 },
/* Navi32*/ { "gfx1101", "gfx1101", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 },
/* Navi33*/ { "gfx1102", "gfx1102", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 },
/* Phoenix */ { "gfx1103", "gfx1103", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 },
{ "gfx1200", "gfx1200", 4, 32, 1, 256, 64 * Ki, 32, 12, 0 },
{ "gfx1201", "gfx1201", 4, 32, 1, 256, 64 * Ki, 32, 12, 0 },
{"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx901", "gfx901", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx903", "gfx903", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx905", "gfx905", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx907", "gfx907", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx90a", "gfx90a", 4, 16, 1, 256, 64 * Ki, 32, 9, 0},
{"gfx940", "gfx940", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
{"gfx941", "gfx941", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
{"gfx942", "gfx942", 4, 16, 1, 256, 64 * Ki, 32, 9, 4},
{"gfx1010", "gfx1010", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
{"gfx1011", "gfx1011", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
{"gfx1012", "gfx1012", 4, 32, 1, 256, 64 * Ki, 32, 10, 1},
{"gfx1030", "gfx1030", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
{"gfx1031", "gfx1031", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
{"gfx1032", "gfx1032", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
{"gfx1033", "gfx1033", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
{"gfx1034", "gfx1034", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
{"gfx1035", "gfx1035", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
{"gfx1036", "gfx1036", 4, 32, 1, 256, 64 * Ki, 32, 10, 3 },
{"gfx1100", "gfx1100", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 },
{"gfx1101", "gfx1101", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 },
{"gfx1102", "gfx1102", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 },
{"gfx1103", "gfx1103", 4, 32, 1, 256, 64 * Ki, 32, 11, 0 },
{"gfx1200", "gfx1200", 4, 32, 1, 256, 64 * Ki, 32, 12, 0 },
{"gfx1201", "gfx1201", 4, 32, 1, 256, 64 * Ki, 32, 12, 0 },
};
const int DeviceInfoSize = sizeof(DeviceInfo) / sizeof(AMDDeviceInfo);
@@ -178,7 +134,14 @@ void OCLDeviceQueries::open(unsigned int test, char* units, double& conversion,
break;
}
}
CHECK_RESULT(deviceFound != true, "Device %s is not supported", name);
if (!deviceFound) {
char msg[256];
SNPRINTF(msg, sizeof(msg), "Unsupported device(%s) for the test!\t",
name);
testDescString = msg;
return;
}
error_ = _wrapper->clGetDeviceInfo(devices_[deviceId],
CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD,
@@ -115,108 +115,6 @@ static const DeviceCounterInfo DeviceInfo[]{
9,
{{14, 0, 4}, {97, 1, 2}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {MCVML2_l,
// reg 0, BigK bank 0 hits}
// Sea Islands, GFX8
{"Bonaire",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Hawaii",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Maui",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Casper",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Spectre",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Slimer",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Spooky",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Kalindi",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Mullins",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Iceland",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Tonga",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Bermuda",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Fiji",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Carrizo",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Ellesmere",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Baffin",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Stoney",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"gfx804",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"gfx803",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Bristol Ridge",
0,
{{14, 0, 4}, {9, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
// Southern Islands
{"Tahiti",
0,
{{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Pitcairn",
0,
{{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Capeverde",
0,
{{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Oland",
0,
{{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
{"Hainan",
0,
{{10, 0, 4}, {5, 0, 3}}}, // {SQ, reg 0, SQ_PERF_SEL_WAVES}, {GRBM, reg 0,
// GRBM_PERF_SEL_CP_BUSY}
};
const int DeviceCounterSize = sizeof(DeviceInfo) / sizeof(DeviceCounterInfo);
@@ -695,15 +593,6 @@ void OCLPerfCounters::run(void) {
deviceName[idx] = '\0';
}
// Begin: to be removed when crash on Kabini is fixed
if (strcmp(deviceName, "Kalindi") == 0) {
char msg[256];
SNPRINTF(msg, sizeof(msg), "Exiting as device is Kabini!\t");
testDescString = msg;
return;
}
// End: to be removed when crash on Kabini is fixed
bool found = false;
unsigned int devId = 0;
for (int idx = 0; !found && idx < DeviceCounterSize; idx++) {
@@ -59,7 +59,6 @@ void OCLPinnedMemory::open(unsigned int test, char* units, double& conversion,
cl_int status;
// Observed failures with Carrizo on GSL path
cl_bool is_apu;
status = clGetDeviceInfo(devices_[deviceId], CL_DEVICE_HOST_UNIFIED_MEMORY,
sizeof(cl_bool), &is_apu, nullptr);
+11 -17
Vedi File
@@ -112,31 +112,25 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
// Supported Version Features
// SIMD/ SIMD Instr Bank LDS Mem
// Target ID HSAIL ID ROC PAL Maj/Min/Stp SRAMECC XNACK CU Width Width Width Size Banks
{"gfx700", "Kaveri", true, false, 7, 0, 0, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Spectre, Spooky, Kalindi
{"gfx701", "Hawaii", true, false, 7, 0, 1, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaiipro
{"gfx702", "gfx702", true, false, 7, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaii (can execute Hawiipro code)
{"gfx703", nullptr, false, false, 7, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Mullins
{"gfx704", "Bonaire", false, false, 7, 0, 4, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx705", "Mullins", false, false, 7, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Godavari
{"gfx801", nullptr, true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx801:xnack-", nullptr, true, false, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx801:xnack+", "Carrizo", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx802", "Tonga", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Iceland
{"gfx803", "Fiji", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Ellesmere/Polaris10, Baffin/Polaris11, Polaris12, Polaris22/VegaM
{"gfx805", nullptr, true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Tongapro
{"gfx801:xnack+", "gfx801", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx802", "gfx802", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx803", "gfx803", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx805", nullptr, true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx810", nullptr, true, true, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx810:xnack-", nullptr, true, false, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx810:xnack+", "Stoney", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx900", "gfx901", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Greenland
{"gfx810:xnack+", "gfx810", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx900", "gfx901", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx900:xnack-", "gfx900", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx900:xnack+", "gfx901", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx902", "gfx903", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven
{"gfx902", "gfx903", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx902:xnack-", "gfx902", true, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx902:xnack+", "gfx903", true, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx904", "gfx905", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega12
{"gfx904", "gfx905", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx904:xnack-", "gfx904", true, true, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx904:xnack+", "gfx905", true, true, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906", "gfx907", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega20
{"gfx906", "gfx907", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:sramecc-", "gfx907", true, true, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:sramecc+", nullptr, true, true, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:xnack-", "gfx906", true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32},
@@ -154,7 +148,7 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
{"gfx908:sramecc-:xnack+", nullptr, true, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:sramecc+:xnack-", nullptr, true, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:sramecc+:xnack+", nullptr, true, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx909", nullptr, false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code)
{"gfx909", nullptr, false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx909:xnack-", nullptr, false, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx909:xnack+", nullptr, false, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90a", nullptr, true, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32},
@@ -193,7 +187,7 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
{"gfx942:sramecc-:xnack+", nullptr, true, false, 9, 4, 2, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx942:sramecc+:xnack-", nullptr, true, false, 9, 4, 2, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx942:sramecc+:xnack+", nullptr, true, false, 9, 4, 2, ON, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Renoir
{"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90c:xnack-", "gfx90c", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90c:xnack+", "gfx90d", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx1010", "gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
+1 -3
Vedi File
@@ -657,11 +657,9 @@ class Settings : public amd::HeapObject {
DeviceKernelArgs, //!< Device memory kernel arguments with no memory
//!< ordering workaround (e.g. XGMI)
DeviceKernelArgsReadback, //!< Device memory kernel arguments with kernel
//!< argument readback workaround (works only in
//!< ASICS >= MI200)
//!< argument readback workaround
DeviceKernelArgsHDP //!< Device memory kernel arguments with kernel
//!< argument readback plus HDP flush workaround.
//!< Works in all ASICS. Requires a valid hdp flush register
};
uint64_t extensions_; //!< Supported OCL extensions
@@ -662,7 +662,7 @@ static constexpr std::array<std::pair<int, int>, 140> gfx10BlockIdPal = {{
{0x2B, 0}, // GCR - 136
{0x2C, 0}, // PH - 137
{0x2D, 0}, // UTCL1 - 138
{0x31, 0}, // SqWgp - 139 For Navi3x
{0x31, 0}, // SqWgp - 139
}};
void PerfCounter::convertInfo() {
@@ -906,7 +906,6 @@ bool Device::create(Pal::IDevice* device) {
static_cast<Pal::AsicRevision>(PAL_FORCE_ASIC_REVISION);
// XNACK flag should be set for PageMigration or IOMMUv2 support.
// Note: Navi2x should have a fix in HW.
bool isXNACKEnabled =
(static_cast<uint>(properties().gpuMemoryProperties.flags.pageMigrationEnabled ||
properties().gpuMemoryProperties.flags.iommuv2Support));
@@ -360,9 +360,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
// Report FP_FAST_FMA define if double precision HW
reportFMA_ = true;
// FMA is 1/4 speed on Pitcairn, Cape Verde, Devastator and Scrapper
// Bonaire, Kalindi, Spectre and Spooky so disable
// FP_FMA_FMAF for those parts in switch below
reportFMAF_ = true;
if (doublePrecision_) {
@@ -1244,7 +1244,6 @@ bool Device::populateOCLDeviceConstants() {
return false;
}
//TODO: add the assert statement for Raven
if (!(isa().versionMajor() == 9 && isa().versionMinor() == 0 && isa().versionStepping() == 2)) {
if (info_.maxEngineClockFrequency_ <= 0) {
LogError("maxEngineClockFrequency_ is NOT positive!");
@@ -1511,7 +1510,6 @@ bool Device::populateOCLDeviceConstants() {
::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1);
// Enable OpenCL 2.0 for Vega10+
if (isa().versionMajor() >= 9) {
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"2.0" " ";
} else {
@@ -1672,14 +1670,11 @@ bool Device::populateOCLDeviceConstants() {
info_.svmCapabilities_ |= CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
}
if (amd::IS_HIP) {
// Report atomics capability based on GFX IP, control on Hawaii
if (info_.iommuv2_ || isa().versionMajor() >= 8) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
}
}
else if (!settings().useLightning_) {
// Report atomics capability based on GFX IP, control on Hawaii
// and Vega10.
if (info_.iommuv2_ || (isa().versionMajor() == 8)) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
}
@@ -1874,8 +1869,8 @@ bool Device::populateOCLDeviceConstants() {
}
break;
case (9):
if ((isa().versionMinor() == 0 && isa().versionStepping() == 10) || // For gfx90a (MI200)
(isa().versionMinor() == 4)) { // For gfx94x (MI300)
if ((isa().versionMinor() == 0 && isa().versionStepping() == 10) ||
(isa().versionMinor() == 4)) {
info_.vgprAllocGranularity_ = 8;
info_.vgprsPerSimd_ = 512;
} else {
@@ -246,12 +246,12 @@ void Settings::setKernelArgImpl(const amd::Isa& isa, bool isXgmi, bool hasValidH
const uint32_t gfxipMinor = isa.versionMinor();
const uint32_t gfxStepping = isa.versionStepping();
const bool isMI300 = gfxipMajor == 9 && gfxipMinor == 4 &&
const bool isGfx94x = gfxipMajor == 9 && gfxipMinor == 4 &&
(gfxStepping == 0 || gfxStepping == 1 || gfxStepping == 2);
const bool isMI200 = (gfxipMajor == 9 && gfxipMinor == 0 && gfxStepping == 10);
const bool isPreMI100 =
const bool isGfx90a = (gfxipMajor == 9 && gfxipMinor == 0 && gfxStepping == 10);
const bool isPreGfx908 =
(gfxipMajor < 9) || ((gfxipMajor == 9) && (gfxipMinor == 0) && (gfxStepping < 8));
const bool isNavi10 =
const bool isGfx101x =
(gfxipMajor == 10) && ((gfxipMinor == 0) || (gfxipMinor == 1));
auto kernelArgImpl = KernelArgImpl::HostKernelArgs;
@@ -262,21 +262,18 @@ void Settings::setKernelArgImpl(const amd::Isa& isa, bool isXgmi, bool hasValidH
kernelArgImpl = KernelArgImpl::DeviceKernelArgs;
} else if (hasValidHDPFlush) {
// If the HDP flush register is valid implement the HDP flush to MMIO
// workaround. This does not work on gfx9 devices before MI100 or Navi10
// devices
if (!(isPreMI100 || isNavi10)) {
// workaround.
if (!(isPreGfx908 || isGfx101x)) {
kernelArgImpl = KernelArgImpl::DeviceKernelArgsHDP;
}
} else if (isMI300 || isMI200) {
} else if (isGfx94x || isGfx90a) {
// Implement the kernel argument readback workaround
// (write all args -> sfence -> write last byte -> mfence -> read last byte)
// It works only on MI200 and MI300 because of the strict guarantee on
// ordering of stores in those ASICS
kernelArgImpl = KernelArgImpl::DeviceKernelArgsReadback;
}
// Enable device kernel args for MI300* for now
if (isMI300) {
// Enable device kernel args for gfx94x for now
if (isGfx94x) {
kernel_arg_impl_ = kernelArgImpl;
kernel_arg_opt_ = true;
}