From 9f06abc1ed3f1987e30f8d905dc3f89bc59f9aab Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 27 Oct 2015 18:14:07 -0400
Subject: [PATCH] P4 to Git Change 1204512 by ashi1@ashi1_win50 on 2015/10/27
17:04:33
SWDEV-79695 - Checkin again after fixed build issue with ocltst changes. Merged all changes in compiler lib to legacy-lib.
HSAIL Metadata Workgroup Size Hint and Vec Type Hint added to HSAIL
HSAIL does not handle kernel attributes for work_group_size_hint or vec_type_hint.
Adding these two kernel attributes to metadata requires multiple changes:
HSAIL: Added parsing functions to HSAILParseMetadata.cpp, and similar files
Properly translates to .hsail files as 'wsh' and 'vth'
MDParser: Flex scanner now reads new hsail metadata, 'wsh' and 'vth'
Acl_metadata: added the two new metadata
Uint32_t wsh[3]; // Added case: RT_WORK_GROUP_SIZE_HINT
Char *vth; // Added case: RT_VEC_TYPE_HINT
Since vth is std::string, it was required to serialize, and then de-serialize for aclMetadata
Affected files ...
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/backends/common/v0_8/if_acl.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/backends/gpu/MDParser/AMDILMDInterface.h#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/backends/gpu/MDParser/AMDILMDParser.l#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/backends/gpu/MDParser/AMDILMDTypes.cpp#3 integrate
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/backends/gpu/MDParser/lex.yy.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/backends/gpu/metadata.cpp#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/include/v0_8/aclEnums.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/include/v0_8/aclStructs.h#4 integrate
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/v0_8/if_acl.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/MDParser/AMDILMDInterface.h#6 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/MDParser/AMDILMDParser.l#7 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/MDParser/AMDILMDTypes.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/MDParser/lex.yy.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/metadata.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclEnums.h#26 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclStructs.h#21 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILKernel.h#15 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILKernelManager.cpp#40 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILModuleInfo.cpp#18 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/HSAIL/HSAILParseMetadata.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLAssumptionCheck.cpp#47 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLEnumCheck.cpp#51 edit
[ROCm/clr commit: d75d0654d4e8bef7e3670767368588f6f4c34f2b]
---
.../lib/backends/common/v0_8/if_acl.cpp | 44 +++++++++++++++++--
.../compiler/lib/include/v0_8/aclEnums.h | 4 +-
.../compiler/lib/include/v0_8/aclStructs.h | 3 ++
3 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/projects/clr/rocclr/compiler/lib/backends/common/v0_8/if_acl.cpp b/projects/clr/rocclr/compiler/lib/backends/common/v0_8/if_acl.cpp
index 98b080787e..9c11bedaba 100644
--- a/projects/clr/rocclr/compiler/lib/backends/common/v0_8/if_acl.cpp
+++ b/projects/clr/rocclr/compiler/lib/backends/common/v0_8/if_acl.cpp
@@ -2356,6 +2356,10 @@ void deserializeCLMetadata(const char* ptr, aclMetadata * const md, const size_t
md->deviceName = tmp_ptr;
tmp_ptr += md->deviceNameSize + 1;
+ // de-serialize the vec type hint
+ md->vth = tmp_ptr;
+ tmp_ptr += md->vecTypeHintSize + 1;
+
// de-serailize the arguments
md->args = reinterpret_cast(tmp_ptr);
tmp_ptr += (md->numArgs + 1) * sizeof(aclArgData);
@@ -2825,7 +2829,34 @@ if_aclQueryInfo(aclCompiler *cl,
}
break;
}
- }
+ case RT_WORK_GROUP_SIZE_HINT: {
+ size_t work_group_size_hint_size = sizeof(md->wsh);
+ if (!ptr) {
+ *size = work_group_size_hint_size;
+ success = true;
+ } else if (*size >= work_group_size_hint_size) {
+ memcpy(ptr, md->wsh, work_group_size_hint_size);
+ success = true;
+ }
+ break;
+ }
+ case RT_VEC_TYPE_HINT: {
+ if (!ptr) {
+ *size = md->vecTypeHintSize;
+ success = true;
+ } else if (*size >= md->vecTypeHintSize) {
+ // vecTypeHint is a pointer, which is serialized by serializeMetadata() to NULL
+ // in binary; to get the data deserializeCLMetadata() is needed
+ aclMetadata *deserializedMd = static_cast(alloca(roSize));
+ deserializeCLMetadata(reinterpret_cast(roSec), deserializedMd, roSize);
+ if (deserializedMd->vth && deserializedMd->vecTypeHintSize == md->vecTypeHintSize) {
+ strncpy(reinterpret_cast(ptr), deserializedMd->vth, deserializedMd->vecTypeHintSize);
+ success = true;
+ }
+ }
+ break;
+ }
+ }
return (success) ? ACL_SUCCESS : ACL_ERROR;
}
static unsigned getSize(aclArgDataType data)
@@ -2878,12 +2909,15 @@ if_aclDbgAddArgument(aclCompiler *cl,
aclMetadata *newMD = reinterpret_cast(newMDptr);
memcpy(tmp_ptr, md, md->struct_size
+ (md->kernelNameSize + 1)
- + (md->deviceNameSize + 1));
+ + (md->deviceNameSize + 1)
+ + (md->vecTypeHintSize + 1));
tmp_ptr += md->struct_size;
tmp_ptr += md->kernelNameSize + 1;
tmp_ptr[-1] = '\0';
tmp_ptr += md->deviceNameSize + 1;
tmp_ptr[-1] = '\0';
+ tmp_ptr += md->vecTypeHintSize + 1;
+ tmp_ptr[-1] = '\0';
newMD->args = reinterpret_cast(tmp_ptr);
unsigned cb_offset = 0;
const aclArgData *c_argPtr = reinterpret_cast(
@@ -3018,6 +3052,7 @@ if_aclDbgRemoveArgument(aclCompiler *cl,
ro_ptr += md->struct_size;
ro_ptr += md->kernelNameSize + 1;
ro_ptr += md->deviceNameSize + 1;
+ ro_ptr += md->vecTypeHintSize + 1;
const aclArgData *argPtr = reinterpret_cast(ro_ptr);
const aclArgData *delArg = 0;
for (unsigned x = 0; x < md->numArgs; ++x) {
@@ -3038,12 +3073,15 @@ if_aclDbgRemoveArgument(aclCompiler *cl,
char *tmp_ptr = newMDptr;
memcpy(tmp_ptr, reinterpret_cast(md), md->struct_size
+ (md->kernelNameSize + 1)
- + (md->deviceNameSize + 1));
+ + (md->deviceNameSize + 1)
+ + (md->vecTypeHintSize +1));
tmp_ptr += md->struct_size;
tmp_ptr += md->kernelNameSize + 1;
tmp_ptr[-1] = '\0';
tmp_ptr += md->deviceNameSize + 1;
tmp_ptr[-1] = '\0';
+ tmp_ptr += md->vecTypeHintSize + 1;
+ tmp_ptr[-1] = '\0';
unsigned cb_offset = ((delArg->type == ARG_TYPE_VALUE)
? delArg->arg.value.cbOffset : delArg->arg.pointer.cbOffset);
size_t printf_offset = reinterpret_cast(md->printf)
diff --git a/projects/clr/rocclr/compiler/lib/include/v0_8/aclEnums.h b/projects/clr/rocclr/compiler/lib/include/v0_8/aclEnums.h
index 25264ada80..928cd048fc 100644
--- a/projects/clr/rocclr/compiler/lib/include/v0_8/aclEnums.h
+++ b/projects/clr/rocclr/compiler/lib/include/v0_8/aclEnums.h
@@ -218,7 +218,9 @@ typedef enum _rt_query_types_enum_0_8 {
RT_NUM_KERNEL_HIDDEN_ARGS = 22,
RT_CONTAINS_SPIRV = 23,
RT_WAVES_PER_SIMD_HINT = 24,
- RT_LAST_TYPE = 25
+ RT_WORK_GROUP_SIZE_HINT = 25,
+ RT_VEC_TYPE_HINT = 26,
+ RT_LAST_TYPE = 27
} aclQueryType_0_8;
//! An enumeration for the various GPU capabilities
diff --git a/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h b/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h
index 0e648669b9..42206cb1fd 100644
--- a/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h
+++ b/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h
@@ -121,6 +121,9 @@ typedef struct _acl_metadata_0_8 {
uint32_t kernel_index; // RT_KERNEL_INDEX
uint32_t numHiddenKernelArgs; // RT_NUM_KERNEL_HIDDEN_ARGS
uint32_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT
+ uint32_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT
+ size_t vecTypeHintSize;
+ const char *vth; // RT_VEC_TYPE_HINT
} aclMetadata_0_8;
//! An structure that holds information on the capabilities of the bif device.