SWDEV-419688 - Do not run GWS init kernel for targets > gfx12 and MI300.

Change-Id: I8e7441268978be71ab8a5a33e7f8bcf69660e500
(cherry picked from commit 36d37ef614909c0f215512aac0c133408d787080)
This commit is contained in:
kjayapra-amd
2023-09-11 15:54:46 -04:00
committed by Sourabh Betigeri
szülő 6398f604b0
commit 6a8bc3c718
7 fájl változott, egészen pontosan 39 új sor hozzáadva és 12 régi sor törölve
+17
Fájl megtekintése
@@ -118,6 +118,23 @@ const char* HipExtraSourceCode = BLIT_KERNELS(
}
);
const char* HipExtraSourceCodeNoGWS = BLIT_KERNELS(
__kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong,
ulong value, ulong sizeBytes) {
__amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes);
}
__kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong,
ulong value, ulong flags, ulong mask) {
__amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask);
}
__kernel void __amd_rocclr_initHeap(ulong heap_to_initialize, ulong initial_blocks,
uint heap_size, uint number_of_initial_blocks) {
__ockl_dm_init_v1(heap_to_initialize, initial_blocks, heap_size, number_of_initial_blocks);
}
);
const char* BlitImageSourceCode = BLIT_KERNELS(
// Extern
extern void __amd_fillImage(__write_only image2d_array_t, float4, int4, uint4, int4, int4,
+2 -1
Fájl megtekintése
@@ -659,7 +659,8 @@ class Settings : public amd::HeapObject {
uint enableCoopMultiDeviceGroups_ : 1; //!< Enable cooperative groups multi device
uint fenceScopeAgent_ : 1; //!< Enable fence scope agent in AQL dispatch packet
uint rocr_backend_ : 1; //!< Device uses ROCr backend for submissions
uint reserved_ : 14;
uint gwsInitSupported_:1; //!< Check if GWS is supported on this machine.
uint reserved_ : 10;
};
uint value_;
};
+5
Fájl megtekintése
@@ -2785,6 +2785,11 @@ bool KernelBlitManager::RunGwsInit(
uint32_t value) const {
amd::ScopedLock k(lockXferOps_);
if (dev().settings().gwsInitSupported_ == false) {
LogError("GWS Init is not supported on this target");
return false;
}
size_t globalWorkOffset[1] = { 0 };
size_t globalWorkSize[1] = { 1 };
size_t localWorkSize[1] = { 1 };
+7 -1
Fájl megtekintése
@@ -96,6 +96,7 @@ bool getValueFromIsaMeta(amd_comgr_metadata_node_t& isaMeta, const char* key,
namespace device {
extern const char* HipExtraSourceCode;
extern const char* HipExtraSourceCodeNoGWS;
} // namespace device
namespace roc {
@@ -847,7 +848,11 @@ bool Device::createBlitProgram() {
#if defined(USE_COMGR_LIBRARY)
if (settings().useLightning_) {
if (amd::IS_HIP) {
extraKernel = device::HipExtraSourceCode;
if (settings().gwsInitSupported_) {
extraKernel = device::HipExtraSourceCode;
} else {
extraKernel = device::HipExtraSourceCodeNoGWS;
}
} else {
extraKernel = SchedulerSourceCode;
}
@@ -1752,6 +1757,7 @@ bool Device::populateOCLDeviceConstants() {
info_.sgprsPerSimd_ =
std::numeric_limits<uint32_t>::max(); // gfx10+ does not share SGPRs between waves
}
return true;
}
+6 -7
Fájl megtekintése
@@ -92,9 +92,9 @@ Settings::Settings() {
// Use coarse grain system memory for kernel arguments by default (to keep GPU cache)
fgs_kernel_arg_ = false;
// by default for asics < gfx940 old single grid sync path is followed
coop_sync_ = false;
barrier_value_packet_ = false;
gwsInitSupported_ = true;
}
// ================================================================================================
@@ -173,13 +173,12 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor
enableWave32Mode_ = GPU_ENABLE_WAVE32_MODE;
}
// No GWS init kernel necessary for these archs
if ((gfxipMajor == 9 && gfxipMinor == 4) || gfxipMajor >= 11) {
coop_sync_ = true;
}
lcWavefrontSize64_ = !enableWave32Mode_;
if (gfxipMajor > 10 || (gfxipMajor == 9 && gfxipMinor == 4)) {
gwsInitSupported_ = false;
}
// Override current device settings
override();
+1 -2
Fájl megtekintése
@@ -51,9 +51,8 @@ class Settings : public device::Settings {
uint cpu_wait_for_signal_ : 1; //!< Wait for HSA signal on CPU
uint system_scope_signal_ : 1; //!< HSA signal is visibile to the entire system
uint fgs_kernel_arg_ : 1; //!< Use fine grain kernel arg segment
uint coop_sync_ : 1; //!< grid and multi-grid sync for gfx940+
uint barrier_value_packet_ : 1; //!< Barrier value packet functionality
uint reserved_ : 20;
uint reserved_ : 21;
};
uint value_;
};
+1 -1
Fájl megtekintése
@@ -3284,7 +3284,7 @@ void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) {
// Add a dependency into the device queue on the current queue
queue->Barriers().AddExternalSignal(Barriers().GetLastSignal());
if (!dev().settings().coop_sync_) {
if (dev().settings().gwsInitSupported_ == true) {
uint32_t workgroups = vcmd.numWorkgroups();
static_cast<KernelBlitManager&>(queue->blitMgr()).RunGwsInit(workgroups - 1);
}