SWDEV-419688 - Do not run GWS init kernel for targets > gfx12 and MI300.
Change-Id: I8e7441268978be71ab8a5a33e7f8bcf69660e500 (cherry picked from commit 36d37ef614909c0f215512aac0c133408d787080)
This commit is contained in:
committed by
Sourabh Betigeri
szülő
6398f604b0
commit
6a8bc3c718
@@ -118,6 +118,23 @@ const char* HipExtraSourceCode = BLIT_KERNELS(
|
||||
}
|
||||
);
|
||||
|
||||
const char* HipExtraSourceCodeNoGWS = BLIT_KERNELS(
|
||||
__kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong,
|
||||
ulong value, ulong sizeBytes) {
|
||||
__amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes);
|
||||
}
|
||||
|
||||
__kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong,
|
||||
ulong value, ulong flags, ulong mask) {
|
||||
__amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask);
|
||||
}
|
||||
|
||||
__kernel void __amd_rocclr_initHeap(ulong heap_to_initialize, ulong initial_blocks,
|
||||
uint heap_size, uint number_of_initial_blocks) {
|
||||
__ockl_dm_init_v1(heap_to_initialize, initial_blocks, heap_size, number_of_initial_blocks);
|
||||
}
|
||||
);
|
||||
|
||||
const char* BlitImageSourceCode = BLIT_KERNELS(
|
||||
// Extern
|
||||
extern void __amd_fillImage(__write_only image2d_array_t, float4, int4, uint4, int4, int4,
|
||||
|
||||
@@ -659,7 +659,8 @@ class Settings : public amd::HeapObject {
|
||||
uint enableCoopMultiDeviceGroups_ : 1; //!< Enable cooperative groups multi device
|
||||
uint fenceScopeAgent_ : 1; //!< Enable fence scope agent in AQL dispatch packet
|
||||
uint rocr_backend_ : 1; //!< Device uses ROCr backend for submissions
|
||||
uint reserved_ : 14;
|
||||
uint gwsInitSupported_:1; //!< Check if GWS is supported on this machine.
|
||||
uint reserved_ : 10;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
|
||||
@@ -2785,6 +2785,11 @@ bool KernelBlitManager::RunGwsInit(
|
||||
uint32_t value) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
|
||||
if (dev().settings().gwsInitSupported_ == false) {
|
||||
LogError("GWS Init is not supported on this target");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t globalWorkOffset[1] = { 0 };
|
||||
size_t globalWorkSize[1] = { 1 };
|
||||
size_t localWorkSize[1] = { 1 };
|
||||
|
||||
@@ -96,6 +96,7 @@ bool getValueFromIsaMeta(amd_comgr_metadata_node_t& isaMeta, const char* key,
|
||||
|
||||
namespace device {
|
||||
extern const char* HipExtraSourceCode;
|
||||
extern const char* HipExtraSourceCodeNoGWS;
|
||||
} // namespace device
|
||||
|
||||
namespace roc {
|
||||
@@ -847,7 +848,11 @@ bool Device::createBlitProgram() {
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
if (settings().useLightning_) {
|
||||
if (amd::IS_HIP) {
|
||||
extraKernel = device::HipExtraSourceCode;
|
||||
if (settings().gwsInitSupported_) {
|
||||
extraKernel = device::HipExtraSourceCode;
|
||||
} else {
|
||||
extraKernel = device::HipExtraSourceCodeNoGWS;
|
||||
}
|
||||
} else {
|
||||
extraKernel = SchedulerSourceCode;
|
||||
}
|
||||
@@ -1752,6 +1757,7 @@ bool Device::populateOCLDeviceConstants() {
|
||||
info_.sgprsPerSimd_ =
|
||||
std::numeric_limits<uint32_t>::max(); // gfx10+ does not share SGPRs between waves
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -92,9 +92,9 @@ Settings::Settings() {
|
||||
|
||||
// Use coarse grain system memory for kernel arguments by default (to keep GPU cache)
|
||||
fgs_kernel_arg_ = false;
|
||||
// by default for asics < gfx940 old single grid sync path is followed
|
||||
coop_sync_ = false;
|
||||
barrier_value_packet_ = false;
|
||||
|
||||
gwsInitSupported_ = true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
@@ -173,13 +173,12 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor
|
||||
enableWave32Mode_ = GPU_ENABLE_WAVE32_MODE;
|
||||
}
|
||||
|
||||
// No GWS init kernel necessary for these archs
|
||||
if ((gfxipMajor == 9 && gfxipMinor == 4) || gfxipMajor >= 11) {
|
||||
coop_sync_ = true;
|
||||
}
|
||||
|
||||
lcWavefrontSize64_ = !enableWave32Mode_;
|
||||
|
||||
if (gfxipMajor > 10 || (gfxipMajor == 9 && gfxipMinor == 4)) {
|
||||
gwsInitSupported_ = false;
|
||||
}
|
||||
|
||||
// Override current device settings
|
||||
override();
|
||||
|
||||
|
||||
@@ -51,9 +51,8 @@ class Settings : public device::Settings {
|
||||
uint cpu_wait_for_signal_ : 1; //!< Wait for HSA signal on CPU
|
||||
uint system_scope_signal_ : 1; //!< HSA signal is visibile to the entire system
|
||||
uint fgs_kernel_arg_ : 1; //!< Use fine grain kernel arg segment
|
||||
uint coop_sync_ : 1; //!< grid and multi-grid sync for gfx940+
|
||||
uint barrier_value_packet_ : 1; //!< Barrier value packet functionality
|
||||
uint reserved_ : 20;
|
||||
uint reserved_ : 21;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
|
||||
@@ -3284,7 +3284,7 @@ void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) {
|
||||
// Add a dependency into the device queue on the current queue
|
||||
queue->Barriers().AddExternalSignal(Barriers().GetLastSignal());
|
||||
|
||||
if (!dev().settings().coop_sync_) {
|
||||
if (dev().settings().gwsInitSupported_ == true) {
|
||||
uint32_t workgroups = vcmd.numWorkgroups();
|
||||
static_cast<KernelBlitManager&>(queue->blitMgr()).RunGwsInit(workgroups - 1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user