Fix sample module_api_global for hip-clang (#1201)

module_api_global relies on a HCC only feature which allows host code
to write to device variables. This feature does not exist in CUDA
or hip-clang, which causes the sample not working in CUDA or hip-clang.

This patch fixes the sample by using standard features of CUDA and
hip-clang. The fixed sample works in HCC, CUDA and hip-clang.
This commit is contained in:
Maneesh Gupta
2019-07-03 08:52:19 +00:00
gecommit door GitHub
bovenliggende 3b0faf950b 688ce62b49
commit 5cd72efff0
2 gewijzigde bestanden met toevoegingen van 6 en 5 verwijderingen
@@ -31,7 +31,6 @@ THE SOFTWARE.
#define SIZE LEN * sizeof(float)
#define fileName "vcpy_kernel.code"
float myDeviceGlobalArray[16];
#define HIP_CHECK(cmd) \
{ \
hipError_t status = cmd; \
@@ -71,14 +70,17 @@ int main() {
float* deviceGlobal;
size_t deviceGlobalSize;
HIP_CHECK(hipModuleGetGlobal((void**)&deviceGlobal, &deviceGlobalSize, Module, "myDeviceGlobal"));
*deviceGlobal = 42.0;
HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), &myDeviceGlobal_h, deviceGlobalSize));
#define ARRAY_SIZE 16
float myDeviceGlobalArray_h[ARRAY_SIZE];
float *myDeviceGlobalArray;
size_t myDeviceGlobalArraySize;
HIP_CHECK(hipModuleGetGlobal((void**)&myDeviceGlobalArray, &myDeviceGlobalArraySize, Module, "myDeviceGlobalArray"));
for (int i = 0; i < ARRAY_SIZE; i++) {
myDeviceGlobalArray_h[i] = i * 1000.0f;
myDeviceGlobalArray[i] = i * 1000.0f;
HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(myDeviceGlobalArray), &myDeviceGlobalArray_h, myDeviceGlobalArraySize));
}
struct {
@@ -25,8 +25,7 @@ THE SOFTWARE.
#define ARRAY_SIZE (16)
__device__ float myDeviceGlobal;
extern float myDeviceGlobalArray[16];
;
__device__ float myDeviceGlobalArray[16];
extern "C" __global__ void hello_world(const float* a, float* b) {
int tx = hipThreadIdx_x;