Fix sample module_api_global for hip-clang (#1201)
module_api_global relies on a HCC only feature which allows host code to write to device variables. This feature does not exist in CUDA or hip-clang, which causes the sample not working in CUDA or hip-clang. This patch fixes the sample by using standard features of CUDA and hip-clang. The fixed sample works in HCC, CUDA and hip-clang.
This commit is contained in:
@@ -31,7 +31,6 @@ THE SOFTWARE.
|
||||
#define SIZE LEN * sizeof(float)
|
||||
|
||||
#define fileName "vcpy_kernel.code"
|
||||
float myDeviceGlobalArray[16];
|
||||
#define HIP_CHECK(cmd) \
|
||||
{ \
|
||||
hipError_t status = cmd; \
|
||||
@@ -71,14 +70,17 @@ int main() {
|
||||
float* deviceGlobal;
|
||||
size_t deviceGlobalSize;
|
||||
HIP_CHECK(hipModuleGetGlobal((void**)&deviceGlobal, &deviceGlobalSize, Module, "myDeviceGlobal"));
|
||||
*deviceGlobal = 42.0;
|
||||
HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), &myDeviceGlobal_h, deviceGlobalSize));
|
||||
|
||||
#define ARRAY_SIZE 16
|
||||
|
||||
float myDeviceGlobalArray_h[ARRAY_SIZE];
|
||||
float *myDeviceGlobalArray;
|
||||
size_t myDeviceGlobalArraySize;
|
||||
HIP_CHECK(hipModuleGetGlobal((void**)&myDeviceGlobalArray, &myDeviceGlobalArraySize, Module, "myDeviceGlobalArray"));
|
||||
for (int i = 0; i < ARRAY_SIZE; i++) {
|
||||
myDeviceGlobalArray_h[i] = i * 1000.0f;
|
||||
myDeviceGlobalArray[i] = i * 1000.0f;
|
||||
HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(myDeviceGlobalArray), &myDeviceGlobalArray_h, myDeviceGlobalArraySize));
|
||||
}
|
||||
|
||||
struct {
|
||||
|
||||
@@ -25,8 +25,7 @@ THE SOFTWARE.
|
||||
#define ARRAY_SIZE (16)
|
||||
|
||||
__device__ float myDeviceGlobal;
|
||||
extern float myDeviceGlobalArray[16];
|
||||
;
|
||||
__device__ float myDeviceGlobalArray[16];
|
||||
|
||||
extern "C" __global__ void hello_world(const float* a, float* b) {
|
||||
int tx = hipThreadIdx_x;
|
||||
|
||||
Verwijs in nieuw issue
Block a user