From 71d346bb94f22fb813ea59dc2ab6ad2fa8494f68 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 4 Jun 2015 14:14:26 -0400
Subject: [PATCH] P4 to Git Change 1157942 by rayxiao@alit_opencl_rayxiao on
2015/06/04 14:00:57
EPR #396242 - Update to HCtoDCmapping: Adding guards for HCtoDCmapping in mapping parameters from LLVM to MVSC. New struct packing rule for doubles in Windows added. Use dc_alignment and hc_alignment to track parameter alignment on device and host compilers respectively.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#66 edit
[ROCm/clr commit: 51d7e63f119c32d63b3c96a8a604b3132026a84f]
---
.../rocclr/runtime/device/cpu/cpumapping.cpp | 121 ++++++++++--------
.../rocclr/runtime/device/cpu/cpumapping.hpp | 11 +-
.../rocclr/runtime/device/cpu/cpuprogram.cpp | 24 +++-
3 files changed, 92 insertions(+), 64 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/cpu/cpumapping.cpp b/projects/clr/rocclr/runtime/device/cpu/cpumapping.cpp
index 8b21cc4276..2d86272d43 100644
--- a/projects/clr/rocclr/runtime/device/cpu/cpumapping.cpp
+++ b/projects/clr/rocclr/runtime/device/cpu/cpumapping.cpp
@@ -30,7 +30,8 @@ namespace cpu {
hc_size = 0;
dc_offset = 0;
dc_size = 0;
- map_alignment = level_alignment;
+ hc_alignment = level_alignment;
+ dc_alignment = level_alignment;
internal_field_map = NULL;
next_field_map = NULL;
return;
@@ -91,7 +92,7 @@ namespace cpu {
return size;
}
- size_t HCtoDCmap::getHostScalarAlignment(const clk_value_type_t type) const
+ size_t HCtoDCmap::getScalarAlignment(const clk_value_type_t type, bool isHost) const
{
size_t align = 0;
switch (type) {
@@ -109,10 +110,18 @@ namespace cpu {
align = sizeof(uint32_t);
break;
case T_LONG:
- align = LP64_SWITCH(4, 8);
+ #if defined(_WIN32)
+ align = 8;
+ #else
+ align = isHost? 8 : LP64_SWITCH(4, 8);
+ #endif
break;
case T_DOUBLE:
+ #if defined(_WIN32)
+ align = 8;
+ #else
align = LP64_SWITCH(4, 8);
+ #endif
break;
case T_CHAR8:
case T_SHORT4: case T_INT2: case T_FLOAT2:
@@ -142,24 +151,28 @@ namespace cpu {
// Align up arguments within each map, return the size of current map parameter
// Input current alignment of the parameter, size of outer struct if it exists
- void HCtoDCmap::align_map(unsigned alignment, unsigned &outer_hc_size, unsigned &outer_dc_size, int &inStruct)
+ void HCtoDCmap::align_map(unsigned outer_hc_alignment, unsigned outer_dc_alignment, unsigned &outer_hc_size, unsigned &outer_dc_size, int &inStruct)
{
unsigned map_param_size = 0;
if (internal_field_map != NULL) {
hc_size = 0; //Recalculate size to account for internal offsets
inStruct++;
- internal_field_map->align_map(map_alignment, hc_size, dc_size, inStruct); // align internal struct, might alter size of this struct
+ internal_field_map->align_map(hc_alignment, dc_alignment, hc_size, dc_size, inStruct); // align internal struct, might alter size of this struct
+ if (hc_alignment != 1 && hc_size%hc_alignment)
+ hc_size = max(hc_size, hc_size - (hc_size%hc_alignment) + hc_alignment);
+ if (dc_alignment != 1 && dc_size%dc_alignment)
+ dc_size = max(dc_size, dc_size - (dc_size%dc_alignment) + dc_alignment);
}
// Use map_param_size to store current parameter size after adjusting alignment
- if (alignment != 1 && hc_size % alignment != 0) {
- map_param_size = max(alignment, hc_size - (hc_size%alignment) + alignment);
+ if (hc_alignment != 1 && hc_size % hc_alignment != 0) {
+ map_param_size = max(hc_alignment, hc_size - (hc_size%hc_alignment) + hc_alignment);
}
else {
- map_param_size = max(alignment, hc_size);
+ map_param_size = max(hc_alignment, hc_size);
}
if (next_field_map != NULL) {
next_field_map->hc_offset = this->next_offset(hc_offset, map_param_size, inStruct);
- next_field_map->align_map(alignment, outer_hc_size, outer_dc_size, inStruct);
+ next_field_map->align_map(outer_hc_alignment, outer_dc_alignment, outer_hc_size, outer_dc_size, inStruct);
// Reset parameter size for char padding
if (next_field_map->type == T_CHAR)
map_param_size = 1;
@@ -179,7 +192,7 @@ namespace cpu {
// Return current size of map, calculate internal maps and process next args if in struct.
// Alignment: alignment flag for members in case of structs, alignment of scalar otherwise.
- int HCtoDCmap::compute_map(const clk_parameter_descriptor_t* desc, unsigned int &alignment, unsigned int init_offset, int& inStruct, int& index_out)
+ int HCtoDCmap::compute_map(const clk_parameter_descriptor_t* desc, unsigned int &outer_hc_alignment, unsigned int &outer_dc_alignment, unsigned int init_offset, int& inStruct, int& index_out)
{
unsigned internal_index;
internal_index = index_out;
@@ -195,11 +208,11 @@ namespace cpu {
index_out++;
internal_index = index_out;
internal_field_map = new HCtoDCmap(desc, 0, internal_index, init_offset);
- hc_size = internal_field_map->compute_map(desc, map_alignment, next_offset, inStruct, index_out);
- map_alignment = max(map_alignment, internal_field_map->map_alignment); // Adjust alignment to biggest member alignment
+ hc_size = internal_field_map->compute_map(desc, hc_alignment, dc_alignment, next_offset, inStruct, index_out);
+ hc_alignment = max(hc_alignment, internal_field_map->hc_alignment); // Adjust alignment to biggest member alignment
struct_size = hc_size;
internal_index = index_out;
- alignment = max(alignment, map_alignment);
+ outer_hc_alignment = max(outer_hc_alignment, hc_alignment);
if (inStruct > 0) {
if (desc[index_out+1].type != T_VOID) {
//Still inside struct and not done
@@ -207,8 +220,8 @@ namespace cpu {
internal_index = index_out;
next_field_map = new HCtoDCmap(desc, 0, internal_index, next_offset);
struct_size = hc_size;
- struct_size += next_field_map->compute_map(desc, alignment, next_offset, inStruct, index_out);
- next_offset = max(next_field_map->hc_offset+next_field_map->hc_size, next_field_map->hc_offset+alignment);
+ struct_size += next_field_map->compute_map(desc, outer_hc_alignment, outer_dc_alignment, next_offset, inStruct, index_out);
+ next_offset = max(next_field_map->hc_offset+next_field_map->hc_size, next_field_map->hc_offset+hc_alignment);
// running count of strucdc_size = hc_size + size of next member
return struct_size;
}
@@ -227,19 +240,20 @@ namespace cpu {
hc_offset = init_offset;
hc_size = getHostScalarParamSize(desc[internal_index].type);
dc_size = hc_size;
- map_alignment = getHostScalarAlignment(desc[internal_index].type);
- alignment = max(alignment, map_alignment); //Adjust alignment of upper level struct if necessary, upper level alignment = max alignment of members
- if (desc[internal_index].type == T_LONG)
- alignment = max(alignment, (unsigned int)8); //Set struct alignment to 8 on outside if containing struct member of long
+ hc_alignment = getScalarAlignment(desc[internal_index].type, true);
+ dc_alignment = getScalarAlignment(desc[internal_index].type, false);
+ outer_hc_alignment = max(outer_hc_alignment, hc_alignment); //Adjust alignment of upper level struct if necessary, upper level alignment = max alignment of members
+ outer_dc_alignment = max(outer_dc_alignment, dc_alignment); //Adjust alignment of upper level struct if necessary, upper level alignment = max alignment of members
if (inStruct > 0) {
if (desc[index_out+1].type != T_VOID) {
//Still inside struct and not done
index_out++;
- next_field_map = new HCtoDCmap(desc, alignment, internal_index, next_offset);
+ next_field_map = new HCtoDCmap(desc, outer_hc_alignment, internal_index, next_offset);
struct_size = hc_size;
- struct_size += next_field_map->compute_map(desc, alignment, next_offset, inStruct, index_out);
- next_offset = hc_offset+alignment;
- alignment = max(alignment, next_field_map->map_alignment);
+ struct_size += next_field_map->compute_map(desc, outer_hc_alignment, outer_dc_alignment, next_offset, inStruct, index_out);
+ next_offset = hc_offset+hc_alignment;
+ outer_hc_alignment = max(outer_hc_alignment, next_field_map->hc_alignment);
+ outer_dc_alignment = max(outer_dc_alignment, next_field_map->dc_alignment);
// running count of strucdc_size = hc_size + size of next member
return struct_size;
}
@@ -268,35 +282,18 @@ namespace cpu {
next_field_map->dc_offset = dc_offset + dc_size;
next_offset = current_offset + hc_size;
}
- //
else {
- if (this->next_field_map->type == T_LONG) {
- if (dc_size % 4 != 0) {
- this->next_field_map->dc_offset = dc_offset + dc_size - (dc_size % 4) + 4; // T_LONG aligned by 4 in target
- }
- else {
- this->next_field_map->dc_offset = dc_offset + dc_size; // T_LONG aligned by 4 in target
- }
- if (dc_size % 8 != 0) {
- next_offset = current_offset + dc_size - (dc_size % 8) + 8; //aligned by 8 in source
- }
- else {
- next_offset = current_offset + dc_size; //aligned by 8 in source
- }
+ if ((dc_offset + dc_size) % next_field_map->dc_alignment != 0) {
+ this->next_field_map->dc_offset = dc_offset + dc_size - (dc_size % next_field_map->dc_alignment) + next_field_map->dc_alignment;
}
else {
- if ((dc_offset + dc_size) % next_field_map->map_alignment != 0) {
- this->next_field_map->dc_offset = dc_offset + dc_size - (dc_size % next_field_map->map_alignment) + next_field_map->map_alignment;
- }
- else {
- this->next_field_map->dc_offset = dc_offset + max(dc_size, next_field_map->map_alignment);
- }
- if ((hc_offset + hc_size) % next_field_map->map_alignment != 0) {
- next_offset = hc_offset + hc_size - (hc_size % next_field_map->map_alignment) + next_field_map->map_alignment;
- }
- else {
- next_offset = hc_offset + max(next_field_map->map_alignment, map_param_size);
- }
+ this->next_field_map->dc_offset = dc_offset + max(dc_size, next_field_map->dc_alignment);
+ }
+ if ((hc_offset + hc_size) % next_field_map->hc_alignment != 0) {
+ next_offset = hc_offset + hc_size - (hc_size % next_field_map->hc_alignment) + next_field_map->hc_alignment;
+ }
+ else {
+ next_offset = hc_offset + max(next_field_map->hc_alignment, map_param_size);
}
}
return next_offset;
@@ -304,13 +301,31 @@ namespace cpu {
}
// Copy memory according to mapping
- unsigned int HCtoDCmap::copy_params(void *dst, const void *src, unsigned int &arg_offset, int& error_code, int &inStruct) const
+ unsigned int HCtoDCmap::copy_params(void *dst, const void *src, unsigned int arg_offset, int& error_code, int &inStruct) const
{
unsigned int padding = 0;
// Pad offset to be aligned by 8 if parameter is double, not as struct field
- if ((arg_offset+dc_offset) % 8 != 0 && (type == T_DOUBLE) && inStruct == 0)
- padding = map_alignment-((arg_offset+dc_offset)%map_alignment);
+ if ((arg_offset) % 8 != 0 && (type == T_DOUBLE) && inStruct == 0)
+ padding = hc_alignment-((arg_offset+dc_offset)%hc_alignment);
+ #if defined(_WIN32)
+ // In windows, double is aligned by 8, add padding to struct if it contains double
+ if ((arg_offset+dc_offset) % 8 != 0 && hc_alignment == 8)
+ padding = hc_alignment-((arg_offset+dc_offset)%hc_alignment);
+ #endif
::memcpy(reinterpret_cast(reinterpret_cast(dst)+padding), src, hc_size);
+ #if defined(_WIN32)
+ if (internal_field_map != NULL) {
+ inStruct++;
+ void *internal_dst = reinterpret_cast(reinterpret_cast(dst)+padding);
+ internal_field_map->copy_params(internal_dst, src, arg_offset+padding, error_code, inStruct);
+ inStruct--;
+ }
+ if (next_field_map != NULL) {
+ void *next_dst = reinterpret_cast(reinterpret_cast(dst)+next_field_map->dc_offset); // Next field starts with padding
+ const void *next_src = reinterpret_cast(reinterpret_cast(src)+next_field_map->hc_offset);
+ next_field_map->copy_params(next_dst, next_src, arg_offset+next_field_map->dc_offset, error_code, inStruct);
+ }
+ #else
if (internal_field_map != NULL) {
inStruct++;
internal_field_map->copy_params(dst, src, arg_offset, error_code, inStruct);
@@ -321,6 +336,8 @@ namespace cpu {
const void *next_src = reinterpret_cast(reinterpret_cast(src)+next_field_map->hc_offset);
next_field_map->copy_params(next_dst, next_src, arg_offset, error_code, inStruct);
}
+ #endif
return padding;
}
+
} //namespace cpu
\ No newline at end of file
diff --git a/projects/clr/rocclr/runtime/device/cpu/cpumapping.hpp b/projects/clr/rocclr/runtime/device/cpu/cpumapping.hpp
index 105b76bde7..263d22a63e 100644
--- a/projects/clr/rocclr/runtime/device/cpu/cpumapping.hpp
+++ b/projects/clr/rocclr/runtime/device/cpu/cpumapping.hpp
@@ -18,19 +18,20 @@ class HCtoDCmap
public:
unsigned int hc_offset, hc_size; // Offset and size of this parameter in host compiler
unsigned int dc_offset, dc_size; // Offset and size of this parameter in device compiler
- unsigned int map_alignment; // Alignment of parameter in host compiler
+ unsigned int hc_alignment; // Alignment of parameter in host compiler
+ unsigned int dc_alignment; // Alignment of parameter in device compiler
clk_value_type_t type; // Type of parameter
HCtoDCmap *internal_field_map; // Pointer to internal mapping when current parameter is of type T_STRUCT
HCtoDCmap *next_field_map; // Pointer to next struct field when current parameter is a struct member
HCtoDCmap(const clk_parameter_descriptor_t*, unsigned int, unsigned int, unsigned int);
virtual ~HCtoDCmap();
- int compute_map(const clk_parameter_descriptor_t*, unsigned int &, unsigned int, int&, int&);
+ int compute_map(const clk_parameter_descriptor_t*, unsigned int &, unsigned int &, unsigned int, int&, int&);
unsigned next_offset(unsigned, unsigned &, int &);
size_t getHostScalarParamSize(const clk_value_type_t) const;
- size_t getHostScalarAlignment(const clk_value_type_t) const;
- void align_map(unsigned, unsigned&, unsigned&, int&);
- unsigned int copy_params(void *, const void *, unsigned int&, int&, int&) const;
+ size_t getScalarAlignment(const clk_value_type_t, bool) const;
+ void align_map(unsigned, unsigned, unsigned&, unsigned&, int&);
+ unsigned int copy_params(void *, const void *, unsigned int, int&, int&) const;
private:
};
diff --git a/projects/clr/rocclr/runtime/device/cpu/cpuprogram.cpp b/projects/clr/rocclr/runtime/device/cpu/cpuprogram.cpp
index 769e414a44..0fc539d6d0 100644
--- a/projects/clr/rocclr/runtime/device/cpu/cpuprogram.cpp
+++ b/projects/clr/rocclr/runtime/device/cpu/cpuprogram.cpp
@@ -182,16 +182,22 @@ getParamSizeImpl(bool cpuLayer, const clk_parameter_descriptor_t* desc,
size_t elementSize =
getParamSizeImpl(cpuLayer, desc, index, qualifier,
&elementAlignment, index_out);
- if (desc[index].type == T_LONG)
- structAlignment = cpuLayer? LP64_SWITCH(4, 8) : 8;
- else
- structAlignment = std::max(maxAlignment, elementAlignment);
+ #if defined(_WIN32)
+ maxAlignment = std::max(maxAlignment, elementAlignment);
+ #else
+ // In Linux, the alignment of long field is 4 for GCC,
+ // but it is 8 on LLVM side
+ if (desc[index].type == T_LONG)
+ structAlignment = cpuLayer? LP64_SWITCH(4, 8) : 8;
+ else
+ structAlignment = std::max(maxAlignment, elementAlignment);
+ maxAlignment = std::max(maxAlignment, structAlignment);
+ #endif
index = *index_out;
structSize =
amd::alignUp(structSize,
std::min(elementAlignment, size_t(16))) +
elementSize;
- maxAlignment = std::max(maxAlignment, structAlignment);
}
*index_out = index + 1;
*alignment = maxAlignment;
@@ -199,7 +205,11 @@ getParamSizeImpl(bool cpuLayer, const clk_parameter_descriptor_t* desc,
} else {
size = getScalarParamSize(cpuLayer, desc[index].type, qualifier);
if (desc[index].type == T_DOUBLE) {
+ #if defined(_WIN32)
+ *alignment = 8;
+ #else
*alignment = LP64_SWITCH(4, 8);
+ #endif
} else if (desc[index].type == T_LONG) {
*alignment = 8;
} else {
@@ -352,8 +362,8 @@ setKernelInfoCallback(std::string symbol, const void* value, void* data)
int inStruct = 0;
int end_index = 0;
HCtoDCmap *map_p = new HCtoDCmap(desc, align, 0, init_offset);
- map_p->dc_size = map_p->compute_map(desc, map_p->map_alignment, init_offset, inStruct, end_index);
- map_p->align_map(map_p->map_alignment, map_p->hc_size, map_p->dc_size, inStruct);
+ map_p->dc_size = map_p->compute_map(desc, map_p->hc_alignment, map_p->dc_alignment, init_offset, inStruct, end_index);
+ map_p->align_map(map_p->hc_alignment, map_p->dc_alignment, map_p->hc_size, map_p->dc_size, inStruct);
if (CPU_USE_ALIGNMENT_MAP == 0) {
kernel->addHCtoDCmap(map_p);
if (map_p->internal_field_map != NULL) {