Doc update for Serialization.
Describe workaround for partial specialization
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
# HIP Bugs
|
||||
|
||||
# HIP Bugs
|
||||
<!-- toc -->
|
||||
|
||||
- [Errors related to undefined reference to `__hcLaunchKernel__***__grid_launch_parm**`](#errors-related-to-undefined-reference-to-__hclaunchkernel____grid_launch_parm)
|
||||
@@ -41,60 +40,86 @@ For example, `Foo` in the code snippets below contains an array-typed member var
|
||||
|
||||
```
|
||||
struct Foo {
|
||||
float _data;
|
||||
// table is an array, which makes foo
|
||||
int table[3];
|
||||
};
|
||||
```
|
||||
|
||||
An workaround is to provide a custom serializer on CPU side, and append the contents of the array as kernel arguments:
|
||||
A workaround is to provide a custom serializer on host side which appends the contents of the array as kernel arguments, and a custome deserializaer on the device path to reconstruct the array inside the GPU kernels.
|
||||
The deserializer can not be a function template, and should have scalar-typed parameters of the number equals to the length of the array-typed member variable. For example:
|
||||
|
||||
```
|
||||
|
||||
struct Foo {
|
||||
int table[3];
|
||||
float _data;
|
||||
int _table[3];
|
||||
|
||||
|
||||
// user-provided CPU serializer
|
||||
// must append the contents of the array member as kernel arguments
|
||||
#ifdef __HCC__
|
||||
// user-provided CPU serializer
|
||||
// Append the contents of the array member as kernel arguments
|
||||
__attribute__((annotate(“serialize”)))
|
||||
void __cxxamp_serialize(Kalmar::Serialize &s) const {
|
||||
s.Append(sizeof(float), &_data);
|
||||
for (int i = 0; i < 3; ++i)
|
||||
s.Append(sizeof(int), &table[i]);
|
||||
s.Append(sizeof(int), &_table[i]);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
```
|
||||
|
||||
Then, provide a custom deserializer on GPU side, to help reconstruct the array within GPU kernels. Notice that the deserializer can not be a function template, and should have scalar-typed parameters of the number equals to the length of the array-typed member variable. For example:
|
||||
|
||||
```
|
||||
struct Foo {
|
||||
int table[3];
|
||||
|
||||
// user-provided GPU deserializer
|
||||
// table has 3 int elements, so deserializer must have 3 int parameters.
|
||||
#ifdef __HCC__
|
||||
__attribute__((annotate(“user_deserialize”)))
|
||||
Foo(int x0, int x1, int x2) [[cpu]][[hc]] {
|
||||
table[0] = x0;
|
||||
table[1] = x1;
|
||||
table[2] = x2;
|
||||
Foo(float d, int x0, int x1, int x2) [[cpu]][[hc]] {
|
||||
_data = d;
|
||||
_table[0] = x0;
|
||||
_table[1] = x1;
|
||||
_table[2] = x2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __HCC__
|
||||
__attribute__((annotate(“serialize”)))
|
||||
void __cxxamp_serialize(Kalmar::Serialize &s) const {
|
||||
s.Append(sizeof(int), &table[0]);
|
||||
s.Append(sizeof(int), &table[1]);
|
||||
s.Append(sizeof(int), &table[2]);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
```
|
||||
|
||||
|
||||
Rather than create serializer functions, another workaround is to pass the member fields from the structure as simple data types.
|
||||
Note a class or struct can contain only one "user_deserialize" constructor.
|
||||
For types which contain arrays which are based on template parameter, you can use partial template instantiation to implement one constructor per specialization.
|
||||
However, an easier approach may be to create one user_deserializer which processes the maximum supported dimension.
|
||||
This will take more memory in the structure and also require additional kernel arguments, but this may have little performance impact and the conversion is easier than partial template specialization. An example:
|
||||
|
||||
```
|
||||
#define MAX_Dim 4
|
||||
template<typename T, int Dim> struct MyArray {
|
||||
|
||||
T* dataPtr_;
|
||||
//int size_[Dim]; // Original code with template-sized Dims
|
||||
int size_[MAX_dim]; // Workaround code - allocate an array big enough for all dims so one serializer works.
|
||||
|
||||
|
||||
...
|
||||
|
||||
#ifdef __HCC__
|
||||
__attribute__((annotate("serialize")))
|
||||
void __cxxamp_serialize(Kalmar::Serialize &s) const {
|
||||
s.Append(sizeof(float), &_dataPtr);
|
||||
for (int i=0; i<MAX_Dim; i++) {
|
||||
s.Append(sizeof(size_[0]), &size_[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__attribute__((annotate("user_deserialize")))
|
||||
MyArray(T* data, int size0, int size1, int size2, int size3) [[cpu]][[hc]] {
|
||||
|
||||
data_ = data;
|
||||
size_[0] = size0;
|
||||
size_[1] = size1;
|
||||
size_[2] = size2;
|
||||
size_[3] = size3;
|
||||
}
|
||||
#endif
|
||||
```
|
||||
|
||||
|
||||
### HIP is more restrictive in enforcing restrictions
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user