SWDEV-406687 - combining rocblitcl and palblitcl blit kernel defs

Change-Id: Ia312d73584a03491e8d574f424295b64df6de174
This commit is contained in:
ajay
2023-06-19 18:02:48 +00:00
zatwierdzone przez Ajay GunaShekar
rodzic e9750de215
commit d6946ffcbc
5 zmienionych plików z 28 dodań i 87 usunięć
+26
Wyświetl plik
@@ -39,6 +39,14 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
extern void __amd_copyBufferRectAligned(__global uint*, __global uint*, ulong4, ulong4, ulong4);
extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong, ulong);
extern void __amd_streamOpsWait(__global uint*, __global ulong*, ulong, ulong, ulong);
extern void __ockl_dm_init_v1(ulong, ulong, uint, uint);
extern void __ockl_gws_init(uint nwm1, uint rid);
// Implementation
__kernel void __amd_rocclr_fillBufferAligned(__global uchar* bufUChar,
__global ushort* bufUShort,
@@ -88,6 +96,24 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
__amd_copyBufferRectAligned(src, dst, srcRect, dstRect, size);
}
__kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong,
ulong value, ulong sizeBytes) {
__amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes);
}
__kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong,
ulong value, ulong flags, ulong mask) {
__amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask);
}
__kernel void __amd_rocclr_initHeap(ulong heap_to_initialize, ulong initial_blocks,
uint heap_size, uint number_of_initial_blocks) {
__ockl_dm_init_v1(heap_to_initialize, initial_blocks, heap_size, number_of_initial_blocks);
}
__kernel void __amd_rocclr_gwsInit(uint value) {
__ockl_gws_init(value, 0);
}
);
const char* BlitImageSourceCode = BLIT_KERNELS(
-35
Wyświetl plik
@@ -22,32 +22,6 @@ namespace pal {
#define BLIT_KERNEL(...) #__VA_ARGS__
const char* palBlitLinearSourceCode = BLIT_KERNEL(
\n
extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong, ulong);
\n
extern void __amd_streamOpsWait(__global uint*,__global ulong*, ulong, ulong, ulong);
\n
extern void __ockl_dm_init_v1(ulong, ulong, uint, uint);
\n
__kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong,
ulong value, ulong sizeBytes) {
__amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes);
}
\n
__kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong,
ulong value, ulong flags, ulong mask) {
__amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask);
}
\n
__kernel void __amd_rocclr_initHeap(ulong heap_to_initialize, ulong initial_blocks,
uint heap_size, uint number_of_initial_blocks) {
__ockl_dm_init_v1(heap_to_initialize, initial_blocks, heap_size, number_of_initial_blocks);
}
\n);
const char* SchedulerSourceCode = BLIT_KERNEL(
\n
extern void __amd_scheduler(__global void*, __global void*, uint);
@@ -67,13 +41,4 @@ extern void __amd_scheduler_pal(__global void*, __global void*, uint);
}
\n);
const char* GwsInitSourceCode = BLIT_KERNEL(
\n
extern void __ockl_gws_init(uint nwm1, uint rid);
\n
__kernel void __amd_rocclr_gwsInit(uint value) {
__ockl_gws_init(value, 0);
}
\n);
} // namespace pal
+1 -8
Wyświetl plik
@@ -835,8 +835,6 @@ Device::~Device() {
extern const char* SchedulerSourceCode;
extern const char* SchedulerSourceCode20;
extern const char* GwsInitSourceCode;
extern const char* palBlitLinearSourceCode;
Pal::IDevice* gDeviceList[Pal::MaxDevices] = {};
uint32_t gStartDevice = 0;
@@ -2506,12 +2504,7 @@ bool Device::createBlitProgram() {
// Delayed compilation due to brig_loader memory allocation
std::string extraBlits;
std::string ocl20;
if (amd::IS_HIP) {
extraBlits = palBlitLinearSourceCode;
if (info().cooperativeGroups_) {
extraBlits.append(GwsInitSourceCode);
}
} else {
if (!amd::IS_HIP) {
if (settings().oclVersion_ >= OpenCL20) {
extraBlits = iDev()->GetDispatchKernelSource();
if (settings().useLightning_) {
-35
Wyświetl plik
@@ -22,32 +22,6 @@ namespace roc {
#define BLIT_KERNEL(...) #__VA_ARGS__
const char* rocBlitLinearSourceCode = BLIT_KERNEL(
// Extern
extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong, ulong);
extern void __amd_streamOpsWait(__global uint*, __global ulong*, ulong, ulong, ulong);
extern void __ockl_dm_init_v1(ulong, ulong, uint, uint);
// Implementation
__kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong,
ulong value, ulong sizeBytes) {
__amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes);
}
__kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong,
ulong value, ulong flags, ulong mask) {
__amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask);
}
__kernel void __amd_rocclr_initHeap(ulong heap_to_initialize, ulong initial_blocks,
uint heap_size, uint number_of_initial_blocks) {
__ockl_dm_init_v1(heap_to_initialize, initial_blocks, heap_size, number_of_initial_blocks);
}
);
const char* SchedulerSourceCode = BLIT_KERNEL(
extern void __amd_scheduler_rocm(__global void*);
@@ -57,13 +31,4 @@ const char* SchedulerSourceCode = BLIT_KERNEL(
}
);
const char* GwsInitSourceCode = BLIT_KERNEL(
extern void __ockl_gws_init(uint nwm1, uint rid);
__kernel void __amd_rocclr_gwsInit(uint value) {
__ockl_gws_init(value, 0);
}
);
} // namespace roc
+1 -9
Wyświetl plik
@@ -546,8 +546,6 @@ bool Device::init() {
}
extern const char* SchedulerSourceCode;
extern const char* GwsInitSourceCode;
extern const char* rocBlitLinearSourceCode;
void Device::tearDown() {
NullDevice::tearDown();
@@ -845,13 +843,7 @@ bool Device::createBlitProgram() {
#if defined(USE_COMGR_LIBRARY)
if (settings().useLightning_) {
if (amd::IS_HIP) {
extraKernel = rocBlitLinearSourceCode;
if (info().cooperativeGroups_) {
extraKernel.append(GwsInitSourceCode);
}
}
else {
if (!amd::IS_HIP) {
extraKernel = SchedulerSourceCode;
}