From 27480ff5a24db54b61c8a52aed1ad7ae87429af1 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 28 Feb 2020 22:54:00 +0200 Subject: [PATCH 001/132] Annotate `__constant__` --- include/hip/hcc_detail/host_defines.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hip/hcc_detail/host_defines.h b/include/hip/hcc_detail/host_defines.h index 11bd577f08..b21946e99f 100644 --- a/include/hip/hcc_detail/host_defines.h +++ b/include/hip/hcc_detail/host_defines.h @@ -60,7 +60,7 @@ THE SOFTWARE. */ // _restrict is supported by the compiler #define __shared__ tile_static -#define __constant__ __attribute__((hc)) +#define __constant__ __attribute__((hc, annotate("__HIP_constant__"))) #elif defined(__clang__) && defined(__HIP__) From 03797ae9869c24690d362520cb7663e069ad6888 Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Tue, 3 Mar 2020 13:25:36 -0500 Subject: [PATCH 002/132] [HIP] add hip specific properties for cooperative kernel multi device --- include/hip/hip_runtime_api.h | 16 ++++++++++++++++ include/hip/nvcc_detail/hip_runtime_api.h | 4 ++++ src/hip_device.cpp | 12 ++++++++++++ src/hip_hcc.cpp | 5 +++++ 4 files changed, 37 insertions(+) diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index 025688e98c..340b01c99d 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -117,6 +117,14 @@ typedef struct hipDeviceProp_t { int integrated; ///< APU vs dGPU int cooperativeLaunch; ///< HIP device supports cooperative launch int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices + int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched functions + int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple + ///devices with unmatched grid dimensions + int cooperativeMultiDeviceUnmatchedBlockDim;///< HIP device supports cooperative launch on multiple + ///devices with unmatched block dimensions + int cooperativeMultiDeviceUnmatchedSharedMem;///< HIP device supports cooperative launch on multiple + ///devices with unmatched shared memories int maxTexture1D; ///< Maximum number of elements in 1D images int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements @@ -313,6 +321,14 @@ typedef enum hipDeviceAttribute_t { hipDeviceAttributeIntegrated, ///< iGPU hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices + hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple + ///devices with unmatched functions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple + ///devices with unmatched grid dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim,///< Supports cooperative launch on multiple + ///devices with unmatched block dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem,///< Supports cooperative launch on multiple + ///devices with unmatched shared memories hipDeviceAttributeMaxTexture1DWidth, ///< Maximum number of elements in 1D images hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D images in image elements diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index 6e0d02d0c0..04f7a429df 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -1134,6 +1134,10 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int dev p_prop->integrated = cdprop.integrated; p_prop->cooperativeLaunch = cdprop.cooperativeLaunch; p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch; + p_prop->cooperativeMultiDeviceUnmatchedFunc = 0; + p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0; + p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0; + p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0; p_prop->maxTexture1D = cdprop.maxTexture1D; p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0]; diff --git a/src/hip_device.cpp b/src/hip_device.cpp index 1bbdb10bbc..e5797727ae 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -310,6 +310,18 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device case hipDeviceAttributeCooperativeMultiDeviceLaunch: *pi = prop->cooperativeMultiDeviceLaunch; break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc: + *pi = prop->cooperativeMultiDeviceUnmatchedFunc; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim: + *pi = prop->cooperativeMultiDeviceUnmatchedGridDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim: + *pi = prop->cooperativeMultiDeviceUnmatchedBlockDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem: + *pi = prop->cooperativeMultiDeviceUnmatchedSharedMem; + break; case hipDeviceAttributeMaxPitch: *pi = prop->memPitch; break; diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index c9688408c8..99c63e0338 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -901,6 +901,11 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) { prop->cooperativeLaunch = (prop->gcnArch < 900) ? 0 : 1; prop->cooperativeMultiDeviceLaunch = (prop->gcnArch < 900) ? 0 : 1; + prop->cooperativeMultiDeviceUnmatchedFunc = prop->cooperativeMultiDeviceLaunch; + prop->cooperativeMultiDeviceUnmatchedGridDim = prop->cooperativeMultiDeviceLaunch; + prop->cooperativeMultiDeviceUnmatchedBlockDim = prop->cooperativeMultiDeviceLaunch; + prop->cooperativeMultiDeviceUnmatchedSharedMem = prop->cooperativeMultiDeviceLaunch; + err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS, &prop->maxTexture1D); DeviceErrorCheck(err); From 4a40010ac6c48ed5660dba83d9d70ccdfc6fb876 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Wed, 4 Mar 2020 14:17:07 +0530 Subject: [PATCH 003/132] Expose support for non-returning atomic FADD Change-Id: If5359488324477315a9bd4f308a75f606c065b39 --- include/hip/hcc_detail/device_library_decls.h | 1 + include/hip/hcc_detail/hip_atomic.h | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/include/hip/hcc_detail/device_library_decls.h b/include/hip/hcc_detail/device_library_decls.h index ac35823cd2..182565ad61 100644 --- a/include/hip/hcc_detail/device_library_decls.h +++ b/include/hip/hcc_detail/device_library_decls.h @@ -72,6 +72,7 @@ extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank( extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void); extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void); +extern "C" __device__ void __ockl_global_atomic_add_f32(__attribute__((address_space(1))) float*, float); // Introduce local address space #define __local __attribute__((address_space(3))) diff --git a/include/hip/hcc_detail/hip_atomic.h b/include/hip/hcc_detail/hip_atomic.h index 263f639e96..7ccfa6b43e 100644 --- a/include/hip/hcc_detail/hip_atomic.h +++ b/include/hip/hcc_detail/hip_atomic.h @@ -73,6 +73,14 @@ float atomicAdd(float* address, float val) return __uint_as_float(r); } + +__device__ +inline +void atomicAddNoRet(float* address, float val) +{ + __ockl_global_atomic_add_f32((__attribute__((address_space(1))) float*)address, val); +} + __device__ inline double atomicAdd(double* address, double val) From 7e45c54ea6d6eaf985385ac8d2539026a3609d26 Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Fri, 6 Mar 2020 11:38:44 -0500 Subject: [PATCH 004/132] move new enums to the end to maintain compatibility --- include/hip/hip_runtime_api.h | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index 340b01c99d..3d76e92632 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -117,14 +117,6 @@ typedef struct hipDeviceProp_t { int integrated; ///< APU vs dGPU int cooperativeLaunch; ///< HIP device supports cooperative launch int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices - int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple - ///devices with unmatched functions - int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple - ///devices with unmatched grid dimensions - int cooperativeMultiDeviceUnmatchedBlockDim;///< HIP device supports cooperative launch on multiple - ///devices with unmatched block dimensions - int cooperativeMultiDeviceUnmatchedSharedMem;///< HIP device supports cooperative launch on multiple - ///devices with unmatched shared memories int maxTexture1D; ///< Maximum number of elements in 1D images int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements @@ -136,6 +128,14 @@ typedef struct hipDeviceProp_t { int kernelExecTimeoutEnabled; /// Date: Fri, 6 Mar 2020 18:30:12 -0500 Subject: [PATCH 005/132] [HIP] Refactor cooperative APIs --- src/hip_hcc.cpp | 4 +- src/hip_module.cpp | 330 +++++++++++++++++++++++++++------------------ 2 files changed, 205 insertions(+), 129 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index c9688408c8..0434a3518c 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -1619,7 +1619,9 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp, // Allows runtime to track some information about the stream. hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired) { - stream = ihipSyncAndResolveStream(stream, lockAcquired); + if (stream == nullptr || stream != stream->getCtx()->_defaultStream){ + stream = ihipSyncAndResolveStream(stream, lockAcquired); + } lp->grid_dim.x = grid.x; lp->grid_dim.y = grid.y; lp->grid_dim.z = grid.z; diff --git a/src/hip_module.cpp b/src/hip_module.cpp index a88abba9cb..8c8c841809 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -140,7 +140,7 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global uint32_t localWorkSizeZ, size_t sharedMemBytes, hipStream_t hStream, void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags, bool isStreamLocked = 0, - void** impCoopParams = 0) { + void** impCoopParams = 0, hc::accelerator_view* coopAV = 0) { using namespace hip_impl; auto ctx = ihipGetTlsDefaultCtx(); @@ -192,7 +192,7 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global if (impCoopParams) { const auto p{static_cast(*impCoopParams)}; // The sixth index is for multi-grid synchronization - kernargs.insert((kernargs.cend() - padSize - HIP_IMPLICIT_KERNARG_SIZE) + 6 * HIP_IMPLICIT_KERNARG_ALIGNMENT, + kernargs.insert((kernargs.cend() - HIP_IMPLICIT_KERNARG_SIZE) + 6 * HIP_IMPLICIT_KERNARG_ALIGNMENT, p, p + HIP_IMPLICIT_KERNARG_ALIGNMENT); } @@ -245,6 +245,10 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global hc::completion_future cf; + if (coopAV) { + lp.av = coopAV; + } + lp.av->dispatch_hsa_kernel(&aql, kernargs.data(), kernargs.size(), (startEvent || stopEvent) ? &cf : nullptr #if (__hcc_workweek__ > 17312) @@ -399,6 +403,82 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList return result; } +void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size_t* usedLDS) +{ + if (f->_is_code_object_v3) { + const auto header = reinterpret_cast(f->_header); + // GRANULATED_WAVEFRONT_VGPR_COUNT is specified in 0:5 bits of COMPUTE_PGM_RSRC1 + // the granularity for gfx6-gfx9 is max(0, ceil(vgprs_used / 4) - 1) + *usedVGPRS = ((header->compute_pgm_rsrc1 & 0x3F) + 1) << 2; + // GRANULATED_WAVEFRONT_SGPR_COUNT is specified in 6:9 bits of COMPUTE_PGM_RSRC1 + // the granularity for gfx9+ is 2 * max(0, ceil(sgprs_used / 16) - 1) + *usedSGPRS = ((((header->compute_pgm_rsrc1 & 0x3C0) >> 6) >> 1) + 1) << 4; + *usedLDS = header->group_segment_fixed_size; + } + else { + const auto header = f->_header; + // VGPRs granularity is 4 + *usedVGPRS = ((header->workitem_vgpr_count + 3) >> 2) << 2; + // adding 2 to take into account the 2 VCC registers & handle the granularity of 16 + *usedSGPRS = header->wavefront_sgpr_count + 2; + *usedSGPRS = ((*usedSGPRS + 15) >> 4) << 4; + *usedLDS = header->workgroup_group_segment_byte_size; + } +} + +hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( + TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) +{ + using namespace hip_impl; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx == nullptr) { + return hipErrorInvalidDevice; + } + + hipDeviceProp_t prop{}; + ihipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); + + prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; + + size_t usedVGPRS = 0; + size_t usedSGPRS = 0; + size_t usedLDS = 0; + getGprsLdsUsage(f, &usedVGPRS, &usedSGPRS, &usedLDS); + + // Due to SPI and private memory limitations, the max of wavefronts per CU in 32 + size_t wavefrontSize = prop.warpSize; + size_t maxWavefrontsPerCU = min(prop.maxThreadsPerMultiProcessor / wavefrontSize, 32); + + const size_t simdPerCU = 4; + const size_t maxWavesPerSimd = maxWavefrontsPerCU / simdPerCU; + + size_t numWavefronts = (blockSize + wavefrontSize - 1) / wavefrontSize; + + size_t availableVGPRs = (prop.regsPerBlock / wavefrontSize / simdPerCU); + size_t vgprs_alu_occupancy = simdPerCU * (usedVGPRS == 0 ? maxWavesPerSimd + : std::min(maxWavesPerSimd, availableVGPRs / usedVGPRS)); + + // Calculate blocks occupancy per CU based on VGPR usage + *numBlocks = vgprs_alu_occupancy / numWavefronts; + + const size_t availableSGPRs = (prop.gcnArch < 800) ? 512 : 800; + size_t sgprs_alu_occupancy = simdPerCU * (usedSGPRS == 0 ? maxWavesPerSimd + : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); + + // Calculate blocks occupancy per CU based on SGPR usage + *numBlocks = std::min(*numBlocks, (uint32_t) (sgprs_alu_occupancy / numWavefronts)); + + size_t total_used_lds = usedLDS + dynSharedMemPerBlk; + if (total_used_lds != 0) { + // Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds) + size_t lds_occupancy = prop.maxSharedMemoryPerMultiProcessor / total_used_lds; + *numBlocks = std::min(*numBlocks, (uint32_t) lds_occupancy); + } + + return hipSuccess; +} + namespace { // kernel for initializing GWS // nwm1 is the total number of work groups minus 1 @@ -412,13 +492,16 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream, hip_impl::program_state& ps) { +#if (__hcc_workweek__ >= 20093) hipError_t result; - if ((f == nullptr) || (stream == nullptr) || (kernelParams == nullptr)) { + if (f == nullptr || kernelParams == nullptr) { return hipErrorNotInitialized; } + stream = ihipSyncAndResolveStream(stream); + if (!stream->getDevice()->_props.cooperativeLaunch) { return hipErrorInvalidConfiguration; } @@ -459,28 +542,44 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, kd->_kernarg_layout = *reinterpret_cast>*>(kargs.getHandle()); + GET_TLS(); + uint32_t numBlocksPerSm; + result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kd, + stream->getDevice()->_props.warpSize, sharedMemBytes); + if (result != hipSuccess) { + return hipErrorLaunchFailure; + } + int maxActiveBlocks = numBlocksPerSm * stream->getDevice()->_props.multiProcessorCount; + + //check to see if the workload fits on the GPU + if (gridDim.x * gridDim.y * gridDim.z > maxActiveBlocks){ + return hipErrorCooperativeLaunchTooLarge; + } void *gwsKernelParam[1]; // calculate total number of work groups minus 1 for the main kernel uint nwm1 = (gridDim.x * gridDim.y * gridDim.z) - 1; gwsKernelParam[0] = &nwm1; - LockedAccessor_StreamCrit_t streamCrit(stream->criticalData(), false); -#if (__hcc_workweek__ >= 19213) - streamCrit->_av.acquire_locked_hsa_queue(); -#endif + hc::accelerator acc = stream->getDevice()->_acc; + // create a cooperative accelerated view for launching gws and main kernels + hc::accelerator_view coopAV = acc.create_cooperative_view(); - GET_TLS(); - // launch the init_gws kernel to initialize the GWS + // wait for this stream to finish operations + stream->locked_wait(); + + LockedAccessor_StreamCrit_t streamCrit(stream->criticalData(), false); + streamCrit->_av.acquire_locked_hsa_queue(); + coopAV.acquire_locked_hsa_queue(); + + // launch the init_gws kernel to initialize the GWS in the dedicated cooperative queue result = ihipModuleLaunchKernel(tls, gwsKD, 1, 1, 1, 1, 1, 1, - 0, stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true); + 0, stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true, nullptr , &coopAV); if (result != hipSuccess) { stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) stream->criticalData()._av.release_locked_hsa_queue(); -#endif - + coopAV.release_locked_hsa_queue(); return hipErrorLaunchFailure; } @@ -488,60 +587,88 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, void* impCoopParams[1]; impCoopParams[0] = &impCoopArg; - // launch the main kernel + // launch the main kernel in the cooperative queue result = ihipModuleLaunchKernel(tls, kd, gridDim.x * blockDimX.x, gridDim.y * blockDimX.y, gridDim.z * blockDimX.z, blockDimX.x, blockDimX.y, blockDimX.z, sharedMemBytes, stream, kernelParams, nullptr, nullptr, - nullptr, 0, true, impCoopParams); + nullptr, 0, true, impCoopParams, &coopAV); stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) stream->criticalData()._av.release_locked_hsa_queue(); -#endif + coopAV.release_locked_hsa_queue(); + + // wait on the dispatch on the dedicated cooperative queue to finish + coopAV.wait(hc::hcWaitModeActive); + return result; +#else + return hipErrorInvalidConfiguration; +#endif + } __attribute__((visibility("default"))) hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags, hip_impl::program_state& ps) { +#if (__hcc_workweek__ >= 20093) hipError_t result; if (numDevices > g_deviceCnt || launchParamsList == nullptr || numDevices > MAX_COOPERATIVE_GPUs) { return hipErrorInvalidValue; } + vector streams; + vector deviceIDs; + // check to see if we have valid distinct streams/devices, if cooperative multi device + // launch is supported and if grid/block dimensions are valid for (int i = 0; i < numDevices; ++i) { - if (!launchParamsList[i].stream->getDevice()->_props.cooperativeMultiDeviceLaunch) { + const hipLaunchParams& lp = launchParamsList[i]; + + if (lp.stream == nullptr){ + return hipErrorInvalidResourceHandle; + } + + auto it = find(streams.begin(), streams.end(), lp.stream); + if (it == streams.end()){ + streams.push_back(lp.stream); + } else{ + return hipErrorInvalidDevice; + } + + const ihipDevice_t* currentDevice = lp.stream->getDevice(); + auto it1 = find(deviceIDs.begin(), deviceIDs.end(), currentDevice->_deviceId); + if (it1 == deviceIDs.end()){ + deviceIDs.push_back(currentDevice->_deviceId); + } else { + return hipErrorInvalidDevice; + } + + if (!currentDevice->_props.cooperativeMultiDeviceLaunch) { + return hipErrorInvalidConfiguration; + } + + if (lp.gridDim.x == 0 || lp.gridDim.y == 0 || lp.gridDim.z == 0 || + lp.blockDim.x == 0 || lp.blockDim.y == 0 || lp.blockDim.z == 0){ return hipErrorInvalidConfiguration; } } - hipFunction_t* gwsKds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); - hipFunction_t* kds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); - - if (kds == nullptr || gwsKds == nullptr) { - return hipErrorNotInitialized; - } + vector gwsKds; + vector kds; + GET_TLS(); // prepare all kernel descriptors for initializing the GWS and the main kernels per device for (int i = 0; i < numDevices; ++i) { const hipLaunchParams& lp = launchParamsList[i]; - if (lp.stream == nullptr) { - free(gwsKds); - free(kds); - return hipErrorNotInitialized; - } - gwsKds[i] = ps.kernel_descriptor(reinterpret_cast(&init_gws), - hip_impl::target_agent(lp.stream)); + gwsKds.push_back(ps.kernel_descriptor(reinterpret_cast(&init_gws), + hip_impl::target_agent(lp.stream))); if (gwsKds[i] == nullptr) { - free(gwsKds); - free(kds); return hipErrorInvalidValue; } hip_impl::kernargs_size_align gwsKargs = ps.get_kernargs_size_align( @@ -550,23 +677,42 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL gwsKargs.getHandle()); - kds[i] = ps.kernel_descriptor(reinterpret_cast(lp.func), - hip_impl::target_agent(lp.stream)); + kds.push_back(ps.kernel_descriptor(reinterpret_cast(lp.func), + hip_impl::target_agent(lp.stream))); if (kds[i] == nullptr) { - free(gwsKds); - free(kds); return hipErrorInvalidValue; } hip_impl::kernargs_size_align kargs = ps.get_kernargs_size_align( reinterpret_cast(lp.func)); kds[i]->_kernarg_layout = *reinterpret_cast>*>( kargs.getHandle()); + + uint32_t numBlocksPerSm; + result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kds[i], + lp.stream->getDevice()->_props.warpSize, lp.sharedMem); + if (result != hipSuccess) { + return hipErrorLaunchFailure; + } + int maxActiveBlocks = numBlocksPerSm * lp.stream->getDevice()->_props.multiProcessorCount; + + //check to see if the workload fits on the GPU + if (lp.gridDim.x * lp.gridDim.y * lp.gridDim.z > maxActiveBlocks){ + return hipErrorCooperativeLaunchTooLarge; + } + } + + vector coopAVs; + + // create cooperative accelerated views for launching gws and main kernels on each device + for (int i = 0; i < numDevices; ++i) { + hc::accelerator acc = launchParamsList[i].stream->getDevice()->_acc; + coopAVs.push_back(acc.create_cooperative_view()); } mg_sync *mg_sync_ptr = 0; - mg_info *mg_info_ptr[MAX_COOPERATIVE_GPUs] = {0}; + vector mg_info_ptr; + - GET_TLS(); result = hip_internal::ihipHostMalloc(tls, (void **)&mg_sync_ptr, sizeof(mg_sync), hipHostMallocDefault); if (result != hipSuccess) { return hipErrorInvalidValue; @@ -576,7 +722,8 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL uint all_sum = 0; for (int i = 0; i < numDevices; ++i) { - result = hip_internal::ihipHostMalloc(tls, (void **)&mg_info_ptr[i], sizeof(mg_info), hipHostMallocDefault); + mg_info *mg_info_temp; + result = hip_internal::ihipHostMalloc(tls, (void **)&mg_info_temp, sizeof(mg_info), hipHostMallocDefault); if (result != hipSuccess) { hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < i; ++j) { @@ -584,6 +731,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL } return hipErrorInvalidValue; } + mg_info_ptr.push_back(mg_info_temp); // calculate the sum of sizes of all grids const hipLaunchParams& lp = launchParamsList[i]; all_sum += lp.blockDim.x * lp.blockDim.y * lp.blockDim.z * @@ -592,10 +740,10 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL // lock all streams before launching the blit kernels for initializing the GWS and main kernels to each device for (int i = 0; i < numDevices; ++i) { + launchParamsList[i].stream->locked_wait(); LockedAccessor_StreamCrit_t streamCrit(launchParamsList[i].stream->criticalData(), false); -#if (__hcc_workweek__ >= 19213) streamCrit->_av.acquire_locked_hsa_queue(); -#endif + coopAVs[i].acquire_locked_hsa_queue(); } // launch the init_gws kernel to initialize the GWS for each device @@ -607,14 +755,13 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL gwsKernelParam[0] = &nwm1; result = ihipModuleLaunchKernel(tls, gwsKds[i], 1, 1, 1, 1, 1, 1, - 0, lp.stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true); + 0, lp.stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true, nullptr, &coopAVs[i]); if (result != hipSuccess) { for (int j = 0; j < numDevices; ++j) { launchParamsList[j].stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) launchParamsList[j].stream->criticalData()._av.release_locked_hsa_queue(); -#endif + coopAVs[i].release_locked_hsa_queue(); } hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < numDevices; ++j) { @@ -660,14 +807,13 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL lp.blockDim.x, lp.blockDim.y, lp.blockDim.z, lp.sharedMem, lp.stream, lp.args, nullptr, nullptr, nullptr, 0, - true, impCoopParams); + true, impCoopParams, &coopAVs[i]); if (result != hipSuccess) { for (int j = 0; j < numDevices; ++j) { launchParamsList[j].stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) launchParamsList[j].stream->criticalData()._av.release_locked_hsa_queue(); -#endif + coopAVs[i].release_locked_hsa_queue(); } hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < numDevices; ++j) { @@ -682,13 +828,14 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL // unlock all streams for (int i = 0; i < numDevices; ++i) { launchParamsList[i].stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) launchParamsList[i].stream->criticalData()._av.release_locked_hsa_queue(); -#endif + coopAVs[i].release_locked_hsa_queue(); } - free(gwsKds); - free(kds); + // wait on the dispatch on cooperative queues on each device to finish + for (int i = 0; i < numDevices; ++i) { + coopAVs[i].wait(hc::hcWaitModeActive); + } hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < numDevices; ++j) { @@ -696,6 +843,9 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL } return result; +#else + return hipErrorInvalidConfiguration; +#endif } namespace hip_impl { @@ -1264,29 +1414,6 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const return ihipLogStatus(hipSuccess); } -void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size_t* usedLDS) -{ - if (f->_is_code_object_v3) { - const auto header = reinterpret_cast(f->_header); - // GRANULATED_WAVEFRONT_VGPR_COUNT is specified in 0:5 bits of COMPUTE_PGM_RSRC1 - // the granularity for gfx6-gfx9 is max(0, ceil(vgprs_used / 4) - 1) - *usedVGPRS = ((header->compute_pgm_rsrc1 & 0x3F) + 1) << 2; - // GRANULATED_WAVEFRONT_SGPR_COUNT is specified in 6:9 bits of COMPUTE_PGM_RSRC1 - // the granularity for gfx9+ is 2 * max(0, ceil(sgprs_used / 16) - 1) - *usedSGPRS = ((((header->compute_pgm_rsrc1 & 0x3C0) >> 6) >> 1) + 1) << 4; - *usedLDS = header->group_segment_fixed_size; - } - else { - const auto header = f->_header; - // VGPRs granularity is 4 - *usedVGPRS = ((header->workitem_vgpr_count + 3) >> 2) << 2; - // adding 2 to take into account the 2 VCC registers & handle the granularity of 16 - *usedSGPRS = header->wavefront_sgpr_count + 2; - *usedSGPRS = ((*usedSGPRS + 15) >> 4) << 4; - *usedLDS = header->workgroup_group_segment_byte_size; - } -} - hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, uint32_t* gridSize, uint32_t* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) @@ -1409,59 +1536,6 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit)); } -hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( - TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) -{ - using namespace hip_impl; - - auto ctx = ihipGetTlsDefaultCtx(); - if (ctx == nullptr) { - return hipErrorInvalidDevice; - } - - hipDeviceProp_t prop{}; - ihipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); - - prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; - - size_t usedVGPRS = 0; - size_t usedSGPRS = 0; - size_t usedLDS = 0; - getGprsLdsUsage(f, &usedVGPRS, &usedSGPRS, &usedLDS); - - // Due to SPI and private memory limitations, the max of wavefronts per CU in 32 - size_t wavefrontSize = prop.warpSize; - size_t maxWavefrontsPerCU = min(prop.maxThreadsPerMultiProcessor / wavefrontSize, 32); - - const size_t simdPerCU = 4; - const size_t maxWavesPerSimd = maxWavefrontsPerCU / simdPerCU; - - size_t numWavefronts = (blockSize + wavefrontSize - 1) / wavefrontSize; - - size_t availableVGPRs = (prop.regsPerBlock / wavefrontSize / simdPerCU); - size_t vgprs_alu_occupancy = simdPerCU * (usedVGPRS == 0 ? maxWavesPerSimd - : std::min(maxWavesPerSimd, availableVGPRs / usedVGPRS)); - - // Calculate blocks occupancy per CU based on VGPR usage - *numBlocks = vgprs_alu_occupancy / numWavefronts; - - const size_t availableSGPRs = (prop.gcnArch < 800) ? 512 : 800; - size_t sgprs_alu_occupancy = simdPerCU * (usedSGPRS == 0 ? maxWavesPerSimd - : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); - - // Calculate blocks occupancy per CU based on SGPR usage - *numBlocks = std::min(*numBlocks, (uint32_t) (sgprs_alu_occupancy / numWavefronts)); - - size_t total_used_lds = usedLDS + dynSharedMemPerBlk; - if (total_used_lds != 0) { - // Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds) - size_t lds_occupancy = prop.maxSharedMemoryPerMultiProcessor / total_used_lds; - *numBlocks = std::min(*numBlocks, (uint32_t) lds_occupancy); - } - - return hipSuccess; -} - hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) { From 09130b3b9206c7a137562f77cb5e1d3de4d936f3 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Fri, 6 Mar 2020 11:47:21 +0530 Subject: [PATCH 006/132] separate printf declaration for vdi/clang There are now two implementations of printf in HIP: 1. The implemenation for HCC is controlled by the HC_FEATURE_PRINTF macro, and it works only with the HCC compiler used in combination with the HCC runtime. 2. The implementation for hip-clang requires the VDI runtime, and is always enabled with that combination. --- bin/hipcc | 1 + include/hip/hcc_detail/device_functions.h | 9 +++++++++ include/hip/hcc_detail/hip_runtime.h | 10 +++++++--- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 06a3f9e385..debc1d46c9 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -133,6 +133,7 @@ if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) { } else { $HIP_VDI_HOME = $HIP_PATH; # use HIP_PATH } + $HIPCXXFLAGS .= "-D__HIP_VDI__"; } if (defined $HIP_VDI_HOME) { diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 7096841da8..e3544a491e 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -33,6 +33,15 @@ THE SOFTWARE. #include #include #include + +#if __HIP_CLANG_ONLY__ +#if __HIP_VDI__ +extern "C" __device__ int printf(const char *fmt, ...); +#else +static inline __device__ void printf(const char* format, All... all) {} +#endif +#endif + /* Integer Intrinsics */ diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index e86611b213..c1ad5b2fe5 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -311,15 +311,19 @@ extern "C" __device__ void* __hip_free(void* ptr); static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); } static inline __device__ void* free(void* ptr) { return __hip_free(ptr); } -#if defined(__HCC_ACCELERATOR__) && defined(HC_FEATURE_PRINTF) +// Declare printf only for the HCC compiler. hip-clang is handled in +// device_functions.h +#if __HCC_ACCELERATOR__ +#if HC_FEATURE_PRINTF template static inline __device__ void printf(const char* format, All... all) { hc::printf(format, all...); } -#elif defined(__HCC_ACCELERATOR__) || __HIP__ +#else template static inline __device__ void printf(const char* format, All... all) {} -#endif +#endif // HC_FEATURE_PRINTF +#endif // __HCC_ACCELERATOR__ #endif //__HCC_OR_HIP_CLANG__ From 5494f5b247a4c30f09014cb352c2cf9ae3723888 Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Mon, 9 Mar 2020 16:03:59 -0400 Subject: [PATCH 007/132] [HIP] fix formatting/code clean up and fix a bug --- src/hip_hcc.cpp | 2 +- src/hip_module.cpp | 27 ++++++++++++--------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 0434a3518c..5a02595aa2 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -1619,7 +1619,7 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp, // Allows runtime to track some information about the stream. hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired) { - if (stream == nullptr || stream != stream->getCtx()->_defaultStream){ + if (stream == nullptr || stream != stream->getCtx()->_defaultStream) { stream = ihipSyncAndResolveStream(stream, lockAcquired); } lp->grid_dim.x = grid.x; diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 8c8c841809..6689d05081 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -192,8 +192,8 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global if (impCoopParams) { const auto p{static_cast(*impCoopParams)}; // The sixth index is for multi-grid synchronization - kernargs.insert((kernargs.cend() - HIP_IMPLICIT_KERNARG_SIZE) + 6 * HIP_IMPLICIT_KERNARG_ALIGNMENT, - p, p + HIP_IMPLICIT_KERNARG_ALIGNMENT); + copy(p, p + HIP_IMPLICIT_KERNARG_ALIGNMENT, + (kernargs.end() - HIP_IMPLICIT_KERNARG_SIZE) + 6 * HIP_IMPLICIT_KERNARG_ALIGNMENT); } /* @@ -543,16 +543,16 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, std::pair>*>(kargs.getHandle()); GET_TLS(); - uint32_t numBlocksPerSm; + uint32_t numBlocksPerSm = 0; result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kd, - stream->getDevice()->_props.warpSize, sharedMemBytes); + blockDimX.x * blockDimX.y * blockDimX.z, sharedMemBytes); if (result != hipSuccess) { return hipErrorLaunchFailure; } int maxActiveBlocks = numBlocksPerSm * stream->getDevice()->_props.multiProcessorCount; //check to see if the workload fits on the GPU - if (gridDim.x * gridDim.y * gridDim.z > maxActiveBlocks){ + if (gridDim.x * gridDim.y * gridDim.z > maxActiveBlocks) { return hipErrorCooperativeLaunchTooLarge; } @@ -603,7 +603,6 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, // wait on the dispatch on the dedicated cooperative queue to finish coopAV.wait(hc::hcWaitModeActive); - return result; #else return hipErrorInvalidConfiguration; @@ -633,16 +632,14 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL return hipErrorInvalidResourceHandle; } - auto it = find(streams.begin(), streams.end(), lp.stream); - if (it == streams.end()){ + if (find(streams.begin(), streams.end(), lp.stream) == streams.end()) { streams.push_back(lp.stream); - } else{ + } else { return hipErrorInvalidDevice; } const ihipDevice_t* currentDevice = lp.stream->getDevice(); - auto it1 = find(deviceIDs.begin(), deviceIDs.end(), currentDevice->_deviceId); - if (it1 == deviceIDs.end()){ + if (find(deviceIDs.begin(), deviceIDs.end(), currentDevice->_deviceId) == deviceIDs.end()) { deviceIDs.push_back(currentDevice->_deviceId); } else { return hipErrorInvalidDevice; @@ -687,16 +684,16 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL kds[i]->_kernarg_layout = *reinterpret_cast>*>( kargs.getHandle()); - uint32_t numBlocksPerSm; + uint32_t numBlocksPerSm = 0; result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kds[i], - lp.stream->getDevice()->_props.warpSize, lp.sharedMem); + lp.blockDim.x * lp.blockDim.y * lp.blockDim.z, lp.sharedMem); if (result != hipSuccess) { return hipErrorLaunchFailure; } int maxActiveBlocks = numBlocksPerSm * lp.stream->getDevice()->_props.multiProcessorCount; //check to see if the workload fits on the GPU - if (lp.gridDim.x * lp.gridDim.y * lp.gridDim.z > maxActiveBlocks){ + if (lp.gridDim.x * lp.gridDim.y * lp.gridDim.z > maxActiveBlocks) { return hipErrorCooperativeLaunchTooLarge; } } @@ -722,7 +719,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL uint all_sum = 0; for (int i = 0; i < numDevices; ++i) { - mg_info *mg_info_temp; + mg_info *mg_info_temp = nullptr; result = hip_internal::ihipHostMalloc(tls, (void **)&mg_info_temp, sizeof(mg_info), hipHostMallocDefault); if (result != hipSuccess) { hip_internal::ihipHostFree(tls, mg_sync_ptr); From fea3017168e0d4af55f72187012e12e63c5341f1 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 10 Mar 2020 18:04:01 +0300 Subject: [PATCH 008/132] [HIP][doc] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2bffd12162..c2e2a7a456 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ ## What is this repository for? ### -HIP allows developers to convert CUDA code to portable C++. The same source code can be compiled to run on NVIDIA or AMD GPUs. +**HIP is a C++ Runtime API and Kernel Language that allows developers to create portable applications for AMD and NVIDIA GPUs from single source code.** + Key features include: * HIP is very thin and has little or no performance impact over coding directly in CUDA or hcc "HC" mode. From 09edc7e49ce4c8210aff2dc3d88e03e6846a6a51 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Tue, 10 Mar 2020 12:04:05 -0500 Subject: [PATCH 009/132] Fix incorrect shfl_xor for Windows copy/paste error, need __shfl_xor w/ lane_mask --- include/hip/hcc_detail/device_functions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index e3544a491e..6e6756fd9c 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -557,7 +557,7 @@ long __shfl_xor(long var, int lane_mask, int width = warpSize) return tmp1; #else static_assert(sizeof(long) == sizeof(int), ""); - return static_cast(__shfl_down(static_cast(var), lane_delta, width)); + return static_cast(__shfl_xor(static_cast(var), lane_mask, width)); #endif } __device__ From b663fccf0bd90f0c75f7bb98aba55601303ea556 Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Tue, 10 Mar 2020 15:26:53 -0400 Subject: [PATCH 010/132] [HIP] return an error if blockDim exceeds maxThreadsPerBlock --- src/hip_module.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 6689d05081..65c218c92d 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -502,7 +502,8 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, stream = ihipSyncAndResolveStream(stream); - if (!stream->getDevice()->_props.cooperativeLaunch) { + if (!stream->getDevice()->_props.cooperativeLaunch || + blockDimX.x * blockDimX.y * blockDimX.z > stream->getDevice()->_props.maxThreadsPerBlock) { return hipErrorInvalidConfiguration; } @@ -650,7 +651,8 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL } if (lp.gridDim.x == 0 || lp.gridDim.y == 0 || lp.gridDim.z == 0 || - lp.blockDim.x == 0 || lp.blockDim.y == 0 || lp.blockDim.z == 0){ + lp.blockDim.x == 0 || lp.blockDim.y == 0 || lp.blockDim.z == 0 || + lp.blockDim.x * lp.blockDim.y * lp.blockDim.z > currentDevice->_props.maxThreadsPerBlock){ return hipErrorInvalidConfiguration; } } From 65a790bc088e1b23b2dc8f49d627c9662151f0ef Mon Sep 17 00:00:00 2001 From: srinivamd <52507740+srinivamd@users.noreply.github.com> Date: Wed, 11 Mar 2020 02:02:54 -0700 Subject: [PATCH 011/132] return hipSuccess when count is zero (#1900) --- src/hip_memory.cpp | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 8159f22a97..4a126532f4 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -478,6 +478,10 @@ void* allocAndSharePtr(const char* msg, size_t sizeBytes, ihipCtx_t* ctx, bool s hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) { hipError_t hip_status = hipSuccess; + if (sizeBytes == 0) { + return hipSuccess; + } + if (HIP_SYNC_HOST_ALLOC) { hipDeviceSynchronize(); } @@ -485,10 +489,6 @@ hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned i auto ctx = ihipGetTlsDefaultCtx(); if ((ctx == nullptr) || (ptr == nullptr)) { hip_status = hipErrorInvalidValue; - } - else if (sizeBytes == 0) { - hip_status = hipSuccess; - // TODO - should size of 0 return err or be siliently ignored? } else { unsigned trueFlags = flags; if (flags == hipHostMallocDefault) { @@ -673,14 +673,15 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_SET_DEVICE(); hipError_t hip_status = hipSuccess; + if (sizeBytes == 0) { + if (ptr) *ptr = NULL; + return ihipLogStatus(hipSuccess); + } + auto ctx = ihipGetTlsDefaultCtx(); // return NULL pointer when malloc size is 0 if ( nullptr == ctx || nullptr == ptr) { hip_status = hipErrorInvalidValue; - } - else if (sizeBytes == 0) { - *ptr = NULL; - hip_status = hipSuccess; } else { auto device = ctx->getWriteableDevice(); *ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false /*shareWithAll*/, @@ -700,14 +701,15 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag HIP_SET_DEVICE(); #if (__hcc_workweek__ >= 19115) + if (sizeBytes == 0) { + if (ptr) *ptr = NULL; + return ihipLogStatus(hipSuccess); + } + hipError_t hip_status = hipSuccess; auto ctx = ihipGetTlsDefaultCtx(); - // return NULL pointer when malloc size is 0 - if (sizeBytes == 0) { - *ptr = NULL; - hip_status = hipSuccess; - } else if ((ctx == nullptr) || (ptr == nullptr)) { + if ((ctx == nullptr) || (ptr == nullptr)) { hip_status = hipErrorInvalidValue; } else { unsigned amFlags = 0; @@ -736,6 +738,9 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_SPECIAL_API(hipHostMalloc, (TRACE_MEM), ptr, sizeBytes, flags); HIP_SET_DEVICE(); + if (sizeBytes == 0) { + return ihipLogStatus(hipSuccess); + } hipError_t hip_status = hipSuccess; hip_status = hip_internal::ihipHostMalloc(tls, ptr, sizeBytes, flags); return ihipLogStatus(hip_status); @@ -744,6 +749,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) { HIP_INIT_SPECIAL_API(hipMallocManaged, (TRACE_MEM), devPtr, size, flags); HIP_SET_DEVICE(); + if (size == 0) { + return ihipLogStatus(hipSuccess); + } hipError_t hip_status = hipSuccess; if(flags != hipMemAttachGlobal) hip_status = hipErrorInvalidValue; @@ -1224,6 +1232,7 @@ hipError_t hipMemcpyToSymbol(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); + if (count == 0) return ihipLogStatus(hipSuccess); if (dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1246,6 +1255,7 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); + if (count == 0) return ihipLogStatus(hipSuccess); if (src == nullptr || dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1269,6 +1279,7 @@ hipError_t hipMemcpyToSymbolAsync(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); + if (count == 0) return ihipLogStatus(hipSuccess); if (dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1301,6 +1312,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, src); + if (count == 0) return ihipLogStatus(hipSuccess); if (src == nullptr || dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1592,6 +1604,7 @@ hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bo srcXoffset = p->srcPos.x; srcYoffset = p->srcPos.y; srcZoffset = p->srcPos.z; + if (copyWidth == 0) return hipSuccess; if (p->dstArray != nullptr) { if ((p->dstArray->isDrv == true) ||( p->dstPtr.ptr!= nullptr)){ return hipErrorInvalidValue; @@ -1933,6 +1946,7 @@ hipError_t getLockedPointer(void *hostPtr, size_t dataLen, void **devicePtrPtr) // TODO - review and optimize hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { + if (height == 0 || width == 0) return hipSuccess; if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue; hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); @@ -1989,6 +2003,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, hipError_t ihipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + if (height == 0 || width == 0) return hipSuccess; if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue; hipError_t e = hipSuccess; int isLockedOrD2D = 0; @@ -2043,6 +2058,7 @@ hipError_t ihip2dOffsetMemcpy(void* dst, size_t dpitch, const void* src, size_t size_t height, size_t srcXOffsetInBytes, size_t srcYOffset, size_t dstXOffsetInBytes, size_t dstYOffset,hipMemcpyKind kind, hipStream_t stream, bool isAsync) { + if (height == 0 || width == 0) return hipSuccess; if((spitch < width + srcXOffsetInBytes) || (srcYOffset >= height)){ return hipErrorInvalidValue; } else if((dpitch < width + dstXOffsetInBytes) || (dstYOffset >= height)){ @@ -2061,6 +2077,7 @@ hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool if (pCopy == nullptr) { return hipErrorInvalidValue; } + if (pCopy->Height == 0 || pCopy->WidthInBytes == 0) return hipSuccess; void* dst; const void* src; size_t spitch = pCopy->srcPitch; size_t dpitch = pCopy->dstPitch; @@ -2140,6 +2157,7 @@ hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream ){ HIP_INIT_SPECIAL_API(hipMemcpy2DFromArrayAsync, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind, stream); size_t byteSize; + if (height == 0 || width == 0) return ihipLogStatus(hipSuccess); if(src) { switch (src->desc.f) { case hipChannelFormatKindSigned: From 774035d8693148731f940cf7541cf22d38cb3b4c Mon Sep 17 00:00:00 2001 From: mhbliao <47895780+mhbliao@users.noreply.github.com> Date: Tue, 17 Mar 2020 04:29:24 -0400 Subject: [PATCH 012/132] [hip] Improve the portability of the header for vector type support. (#1873) - Need to check the availability of `__has_attribute` builtin macro instead of compiler versions. That's more reliable and portable among various compilers. - Provides a very basic support of vectors for unknown compilers. --- include/hip/hcc_detail/hip_vector_types.h | 93 ++++++++++++++++++++++- 1 file changed, 91 insertions(+), 2 deletions(-) diff --git a/include/hip/hcc_detail/hip_vector_types.h b/include/hip/hcc_detail/hip_vector_types.h index 39457795ae..7a91b6a532 100644 --- a/include/hip/hcc_detail/hip_vector_types.h +++ b/include/hip/hcc_detail/hip_vector_types.h @@ -34,7 +34,7 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -#if !defined(_MSC_VER) || __clang__ +#if defined(__has_attribute) #if __has_attribute(ext_vector_type) #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n))) #else @@ -1241,7 +1241,9 @@ DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1); DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2); DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3); DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4); -#else // defined(_MSC_VER) +#else // !defined(__has_attribute) + +#if defined(_MSC_VER) #include #include #include @@ -1347,5 +1349,92 @@ typedef union { double4 data; } double3; typedef union { __m256d data[2]; } double8; typedef union { __m256d data[4]; } double16; +#else // !defined(_MSC_VER) + +typedef union { char data; } char1; +typedef union { char data[2]; } char2; +typedef union { char data[4]; } char4; +typedef union { char data[8]; } char8; +typedef union { char data[16]; } char16; +typedef union { char4 data; } char3; + +typedef union { unsigned char data; } uchar1; +typedef union { unsigned char data[2]; } uchar2; +typedef union { unsigned char data[4]; } uchar4; +typedef union { unsigned char data[8]; } uchar8; +typedef union { unsigned char data[16]; } uchar16; +typedef union { uchar4 data; } uchar3; + +typedef union { short data; } short1; +typedef union { short data[2]; } short2; +typedef union { short data[4]; } short4; +typedef union { short data[8]; } short8; +typedef union { short data[16]; } short16; +typedef union { short4 data; } short3; + +typedef union { unsigned short data; } ushort1; +typedef union { unsigned short data[2]; } ushort2; +typedef union { unsigned short data[4]; } ushort4; +typedef union { unsigned short data[8]; } ushort8; +typedef union { unsigned short data[16]; } ushort16; +typedef union { ushort4 data; } ushort3; + +typedef union { int data; } int1; +typedef union { int data[2]; } int2; +typedef union { int data[4]; } int4; +typedef union { int data[8]; } int8; +typedef union { int data[16]; } int16; +typedef union { int4 data; } int3; + +typedef union { unsigned int data; } uint1; +typedef union { unsigned int data[2]; } uint2; +typedef union { unsigned int data[4]; } uint4; +typedef union { unsigned int data[8]; } uint8; +typedef union { unsigned int data[16]; } uint16; +typedef union { uint4 data; } uint3; + +typedef union { long data; } long1; +typedef union { long data[2]; } long2; +typedef union { long data[4]; } long4; +typedef union { long data[8]; } long8; +typedef union { long data[16]; } long16; +typedef union { long4 data; } long3; + +typedef union { unsigned long data; } ulong1; +typedef union { unsigned long data[2]; } ulong2; +typedef union { unsigned long data[4]; } ulong4; +typedef union { unsigned long data[8]; } ulong8; +typedef union { unsigned long data[16]; } ulong16; +typedef union { ulong4 data; } ulong3; + +typedef union { long long data; } longlong1; +typedef union { long long data[2]; } longlong2; +typedef union { long long data[4]; } longlong4; +typedef union { long long data[8]; } longlong8; +typedef union { long long data[16]; } longlong16; +typedef union { longlong4 data; } longlong3; + +typedef union { unsigned long long data; } ulonglong1; +typedef union { unsigned long long data[2]; } ulonglong2; +typedef union { unsigned long long data[4]; } ulonglong4; +typedef union { unsigned long long data[8]; } ulonglong8; +typedef union { unsigned long long data[16]; } ulonglong16; +typedef union { ulonglong4 data; } ulonglong3; + +typedef union { float data; } float1; +typedef union { float data[2]; } float2; +typedef union { float data[4]; } float4; +typedef union { float data[8]; } float8; +typedef union { float data[16]; } float16; +typedef union { float4 data; } float3; + +typedef union { double data; } double1; +typedef union { double data[2]; } double2; +typedef union { double data[4]; } double4; +typedef union { double data[8]; } double8; +typedef union { double data[16]; } double16; +typedef union { double4 data; } double3; + #endif // defined(_MSC_VER) +#endif // defined(__has_attribute) #endif From 7aa96116890efcee73bb3317592223030fd944d2 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 17 Mar 2020 04:30:20 -0400 Subject: [PATCH 013/132] Let hipcc not pass -mllvm option to HIP-Clang on Windows (#1924) Currently there is a clang bug on Windows causing duplicate -mllvm options in clang -cc1. Tempoarily disable -mllvm options for HIP-Clang on Windows until the bug is fixed. Change-Id: I3a4393ba7745989398dc6c6001722837dad18704 --- bin/hipcc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/hipcc b/bin/hipcc index debc1d46c9..bc75501621 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -711,7 +711,9 @@ if ($HIP_PLATFORM eq "clang") { $HIPCXXFLAGS .= " -O3"; $HIPLDFLAGS .= " -O3"; } - if ($optArg ne "-O0") { + # Do not pass -mllvm on Windows since there is a clang bug causing duplicate -mllvm options in clang -cc1 on Windows. + # ToDo : remove restriction for Windows after clang bug is fixed. + if ($optArg ne "-O0" and not $isWindows) { $HIPCXXFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; if ($needLDFLAGS and not $needCXXFLAGS) { $HIPLDFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; From bf04d7380a2a817f5b4d2d8b01867c2dc2d9dd8d Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Tue, 17 Mar 2020 03:30:38 -0500 Subject: [PATCH 014/132] Fix errors in occupancy calculation function (#1926) Fix two errors in hipOccupancyMaxActiveBlocksPerMultiprocessor. 1) Fix a possible segfault if the user passed in a null pointer for the numBlocks value. 2) Handle the situation when the user is asking for a block size that is larger than what the target device can hold within a single block. --- src/hip_module.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 65c218c92d..6ec260b58a 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -426,7 +426,7 @@ void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size } } -hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( +static hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) { using namespace hip_impl; @@ -435,10 +435,18 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( if (ctx == nullptr) { return hipErrorInvalidDevice; } + if (numBlocks == nullptr) { + return hipErrorInvalidValue; + } hipDeviceProp_t prop{}; ihipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); + if (blockSize > prop.maxThreadsPerBlock) { + *numBlocks = 0; + return hipSuccess; + } + prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; size_t usedVGPRS = 0; From 55e55e78bb9cdf7443ea827f93891fc3cd4f83ca Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Tue, 17 Mar 2020 03:30:51 -0500 Subject: [PATCH 015/132] Fix maxSharedMemoryPerMultiProcessor attribute (#1927) The maxSharedMemoryPerMultiProcessor attribute is meant to describe the number of bytes of shared memory (LDS space in AMD terminology) in each SM (CU in AMD terminology). For instance, on AMD GPUs this is often 64KB per CU, and some Nvidia GPUs it's 96KB per SM. This shared memory is a different address space from the normal global memory. However, the current HIP-HCC properties fill this in with a size that matches the totalGlboalMem property. This gives a drastically too-high calculation for the amount of LDS space that each CU has -- tens of GBs vs. 10s of KBs. This patch fixes this by pulling the maxSharedMemoryPerMultiProcessor property from the HSA pool that describes how much workgroup-local space is available on each CU. The HSA runtime eventually pulls this from the topology information about LDSSizeInKB, defined as "Size of Local Data Store in Kilobytes per SIMD". Previously, this HSA query was used to fill in the value of the sharedMemPerBlock property. On today's AMD GPUs, we know that the amount of LDS avaialble to the workgroup is identical to the amount of LDS space in the CU. However, in the future this may differ. As such, this patch changes around the order and fills in the "PerMultiProcessor" property from the HSA query (since what's what the query is defined to return), and then separately fills in the "PerBlock" property as we know it. --- samples/1_Utils/hipInfo/hipInfo.cpp | 3 ++- src/hip_hcc.cpp | 8 +++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index e17f19675a..14faa7671b 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -56,6 +56,7 @@ void printCompilerInfo() { #endif } +double bytesToKB(size_t s) { return (double)s / (1024.0); } double bytesToGB(size_t s) { return (double)s / (1024.0 * 1024.0 * 1024.0); } #define printLimit(w1, limit, units) \ @@ -97,7 +98,7 @@ void printDeviceProp(int deviceId) { cout << setw(w1) << "totalGlobalMem: " << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl; cout << setw(w1) << "maxSharedMemoryPerMultiProcessor: " << fixed << setprecision(2) - << bytesToGB(props.maxSharedMemoryPerMultiProcessor) << " GB" << endl; + << bytesToKB(props.maxSharedMemoryPerMultiProcessor) << " KB" << endl; cout << setw(w1) << "totalConstMem: " << props.totalConstMem << endl; cout << setw(w1) << "sharedMemPerBlock: " << (float)props.sharedMemPerBlock / 1024.0 << " KB" << endl; diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 5fb7c53260..be08430bc3 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -677,7 +677,7 @@ hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) { break; case HSA_REGION_SEGMENT_GROUP: err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, - &(p_prop->sharedMemPerBlock)); + &(p_prop->maxSharedMemoryPerMultiProcessor)); break; default: break; @@ -835,10 +835,8 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) { hsa_region_t* am_region = static_cast(_acc.get_hsa_am_region()); err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); DeviceErrorCheck(err); - // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. - // Group memory will not be paged out, so, the physical memory size is the total shared memory - // size, and also equal to the group pool size. - prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; + // Current GPUs allow a workgroup to use all of LDS in a CU, so these two are equal. + prop->sharedMemPerBlock = prop->maxSharedMemoryPerMultiProcessor; // Get Max memory clock frequency err = From 015895a265facfa53483e51789d65c2012705ab5 Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Tue, 17 Mar 2020 04:31:11 -0400 Subject: [PATCH 016/132] [HIP] add cooperative kernel launch APIs on NVCC (#1929) --- include/hip/nvcc_detail/hip_runtime_api.h | 26 +++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index 04f7a429df..a1f90e8e10 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -186,6 +186,7 @@ typedef struct cudaArray hipArray; typedef struct cudaArray* hipArray_t; typedef struct cudaArray* hipArray_const_t; typedef struct cudaFuncAttributes hipFuncAttributes; +typedef struct cudaLaunchParams hipLaunchParams; #define hipFunction_attribute CUfunction_attribute #define hip_Memcpy2D CUDA_MEMCPY2D #define hipMemcpy3DParms cudaMemcpy3DParms @@ -1275,6 +1276,12 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att case hipDeviceAttributeEccEnabled: cdattr = cudaDevAttrEccEnabled; break; + case hipDeviceAttributeCooperativeLaunch: + cdattr = cudaDevAttrCooperativeLaunch; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + cdattr = cudaDevAttrCooperativeMultiDeviceLaunch; + break; default: return hipCUDAErrorTohipError(cudaErrorInvalidValue); } @@ -1683,6 +1690,17 @@ inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_ return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array)); } +inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream) { + return hipCUDAErrorTohipError( + cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream)); +} + +inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags)); +} #ifdef __cplusplus } @@ -1746,6 +1764,14 @@ template inline static hipChannelFormatDesc hipCreateChannelDesc() { return cudaCreateChannelDesc(); } + +template +inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { + return hipCUDAErrorTohipError( + cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream)); +} + #endif //__CUDACC__ #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H From bbbb8bf1e6401dec1cdd8d529a33c34731c135c7 Mon Sep 17 00:00:00 2001 From: Reshabh Sharma <60782296+rksharma-yymd@users.noreply.github.com> Date: Tue, 17 Mar 2020 14:01:33 +0530 Subject: [PATCH 017/132] Don't force compiler to treat libhip_hcc.so as a text file (#1931) Fixes SWDEV-226025, Right now -x c++ can come before libhip_hcc.so which forces the compiler to treat libhip_hcc.so as a text file and generates a lot of gibberish unicode. This PR changes the order of flags ensuring that -x c++ and similar flags come after libhip_hcc.so Hopefully, this will not have any negative side effect. --- bin/hipcc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index bc75501621..26ea4114eb 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -735,12 +735,12 @@ if ($HIPCC_LINK_FLAGS_APPEND) { } my $CMD="$HIPCC"; -if ($needCXXFLAGS) { - $CMD .= " $HIPCXXFLAGS"; -} if ($needLDFLAGS and not $compileOnly) { $CMD .= " $HIPLDFLAGS"; } +if ($needCXXFLAGS) { + $CMD .= " $HIPCXXFLAGS"; +} $CMD .= " $toolArgs"; if ($verbose & 0x1) { From 18e6c529bc20047551c4cbc09216a4df8cbbef49 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Tue, 17 Mar 2020 03:31:44 -0500 Subject: [PATCH 018/132] Fix detection of support for cooperative groups (#1932) Query ROCr to see if we have the proper lower-level support for cooperative groups -- GWS support through the firmware, driver, thunk, and ROCr. ROCr does these checks for us, and presents a query that allows us to see if GWS entries are available for use. If so, then we have all the lower-level technologies needed, and we should enable cooperative groups support for HIP. --- src/hip_hcc.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index be08430bc3..3f7128e964 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -895,9 +895,11 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) { prop->integrated = 1; } - // Enable the cooperative group for gfx9+ - prop->cooperativeLaunch = (prop->gcnArch < 900) ? 0 : 1; - prop->cooperativeMultiDeviceLaunch = (prop->gcnArch < 900) ? 0 : 1; + // Enable the cooperative group for GPUs that support all the required features + err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES, + &prop->cooperativeLaunch); + DeviceErrorCheck(err); + prop->cooperativeMultiDeviceLaunch = prop->cooperativeLaunch; prop->cooperativeMultiDeviceUnmatchedFunc = prop->cooperativeMultiDeviceLaunch; prop->cooperativeMultiDeviceUnmatchedGridDim = prop->cooperativeMultiDeviceLaunch; From 320742e8a07edaf375e6eb81403231b8406d3750 Mon Sep 17 00:00:00 2001 From: Sarbojit2019 <52527887+SarbojitAMD@users.noreply.github.com> Date: Tue, 17 Mar 2020 14:02:00 +0530 Subject: [PATCH 019/132] Fix __sad signature match with Cuda (#1936) Fix for issue #1930 --- include/hip/hcc_detail/device_functions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 6e6756fd9c..d8c7f62550 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -122,7 +122,7 @@ __device__ static int __mul24(int x, int y); __device__ static long long int __mul64hi(long long int x, long long int y); __device__ static int __mulhi(int x, int y); __device__ static int __rhadd(int x, int y); -__device__ static unsigned int __sad(int x, int y, int z); +__device__ static unsigned int __sad(int x, int y,unsigned int z); __device__ static unsigned int __uhadd(unsigned int x, unsigned int y); __device__ static int __umul24(unsigned int x, unsigned int y); __device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); @@ -193,7 +193,7 @@ __device__ static inline int __rhadd(int x, int y) { int value = z & 0x7FFFFFFF; return ((value) >> 1 || sign); } -__device__ static inline unsigned int __sad(int x, int y, int z) { +__device__ static inline unsigned int __sad(int x, int y, unsigned int z) { return x > y ? x - y + z : y - x + z; } __device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { From 7bcfdf017d5c0c32ed7b614317fb1392b5264fa8 Mon Sep 17 00:00:00 2001 From: Reshabh Sharma <60782296+rksharma-yymd@users.noreply.github.com> Date: Tue, 17 Mar 2020 14:02:14 +0530 Subject: [PATCH 020/132] Output file name should not change flags picked for compiler (#1938) Fixes SWDEV-207362, The output file name should not contribute to picking up the right flags for the compiler. This fix solves issues when the output has conflicting extensions which confuses hipcc to treat them as the source files and add the required flags for them. PS: Output file refers to the file followed by -o Example: hipcc test.o -o test.hip will add the flags for .hip compilation ignoring the fact that it is an output file --- bin/hipcc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/bin/hipcc b/bin/hipcc index 26ea4114eb..929fe1cd71 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -375,6 +375,7 @@ my $toolArgs = ""; # arguments to pass to the hcc or nvcc tool my $optArg = ""; # -O args my $targetOpt = '--amdgpu-target='; my $targetsStr = ""; +my $skipOutputFile = 0; # file followed by -o should not contibute in picking compiler flags foreach $arg (@ARGV) { @@ -386,8 +387,16 @@ foreach $arg (@ARGV) $needCXXFLAGS = 1; $needLDFLAGS = 0; } + + if ($skipOutputFile) { + $toolArgs .= " $arg"; + $skipOutputFile = 0; + next; + } + if ($arg eq '-o') { $needLDFLAGS = 1; + $skipOutputFile = 1; } if(($trimarg eq '-stdlib=libc++') and ($setStdLib eq 0)) From 4128d68ed7be3d5e3cea42f0ef07c43dc86a0837 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Tue, 17 Mar 2020 03:32:48 -0500 Subject: [PATCH 021/132] Fix occupancy calculations API on NVCC (#1941) NVCC warned if you tried to use hipOccupancyMaxActiveBlocksPerMultiprocessor because when passing in a device function pointer, "const void* func" was insufficient to describe it accurately. Adding a C++ templated class type definition for this function. --- include/hip/nvcc_detail/hip_runtime_api.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index a1f90e8e10..fe17ed79f2 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -1708,6 +1708,17 @@ inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* #ifdef __CUDACC__ +template +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + T func, + int blockSize, + size_t dynamicSMemSize) { + cudaError_t cerror; + cerror = + cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, blockSize, dynamicSMemSize); + return hipCUDAErrorTohipError(cerror); +} + template inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func, size_t dynamicSMemSize = 0, From f7e85649f47dc22be26091d37ebdaae874fa4d28 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Tue, 17 Mar 2020 03:32:59 -0500 Subject: [PATCH 022/132] Fix compiler warning on NVCC path (#1942) GCC emits a warning about using static functions like hipCUDAErrorTohipError inside this function, because it has an inline directive, but it's not static. Adding static to this function to silence warnings (and prevent potential problems in the future). --- include/hip/nvcc_detail/hip_runtime_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index fe17ed79f2..d9eb3e4146 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -861,7 +861,7 @@ inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, } -inline hipError_t hipMemcpyWithStream(void* dst, const void* src, +inline static hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind copyKind, hipStream_t stream) { cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes, From 899c87870377b259b4911644684c0fa3545a984b Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe <41661541+ssahasra@users.noreply.github.com> Date: Tue, 17 Mar 2020 14:03:27 +0530 Subject: [PATCH 023/132] enable HCC printf when using hip-clang (#1947) This allows printf to work with hip-clang and HCC runtime. See comments under #1919 for a reported bug and feature request. --- include/hip/hcc_detail/device_functions.h | 14 ++++++++++---- include/hip/hcc_detail/hip_runtime.h | 14 -------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index d8c7f62550..66a99108da 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -34,13 +34,19 @@ THE SOFTWARE. #include #include -#if __HIP_CLANG_ONLY__ -#if __HIP_VDI__ +#if __HIP_CLANG_ONLY__ && __HIP_VDI__ extern "C" __device__ int printf(const char *fmt, ...); #else +#if HC_FEATURE_PRINTF +template +static inline __device__ void printf(const char* format, All... all) { + hc::printf(format, all...); +} +#else +template static inline __device__ void printf(const char* format, All... all) {} -#endif -#endif +#endif // HC_FEATURE_PRINTF +#endif // __HIP_CLANG_ONLY__ && __HIP_VDI__ /* Integer Intrinsics diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index c1ad5b2fe5..448331a868 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -311,20 +311,6 @@ extern "C" __device__ void* __hip_free(void* ptr); static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); } static inline __device__ void* free(void* ptr) { return __hip_free(ptr); } -// Declare printf only for the HCC compiler. hip-clang is handled in -// device_functions.h -#if __HCC_ACCELERATOR__ -#if HC_FEATURE_PRINTF -template -static inline __device__ void printf(const char* format, All... all) { - hc::printf(format, all...); -} -#else -template -static inline __device__ void printf(const char* format, All... all) {} -#endif // HC_FEATURE_PRINTF -#endif // __HCC_ACCELERATOR__ - #endif //__HCC_OR_HIP_CLANG__ #ifdef __HCC__ From 7c8b8d24ef7cc0f0c69db3ee9c785804ee9317b0 Mon Sep 17 00:00:00 2001 From: zhaozhangjian <38252887+zhaozhangjian@users.noreply.github.com> Date: Tue, 17 Mar 2020 16:35:07 +0800 Subject: [PATCH 024/132] fix a bug when initializing a vector of hipFunction_t (#1949) --- src/hip_clang.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp index f17d2673c7..4c8ae07134 100644 --- a/src/hip_clang.cpp +++ b/src/hip_clang.cpp @@ -50,7 +50,7 @@ __hipRegisterFatBinary(const void* data) return nullptr; } - auto modules = new std::vector{g_deviceCnt}; + auto modules = new std::vector(g_deviceCnt); if (!modules) { return nullptr; } @@ -133,7 +133,7 @@ extern "C" void __hipRegisterFunction( int* wSize) { HIP_INIT_API(NONE, modules, hostFunction, deviceFunction, deviceName); - std::vector functions{g_deviceCnt}; + std::vector functions(g_deviceCnt); assert(modules && modules->size() >= g_deviceCnt); for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { From 16a6a94fbfcb8339fefdd5339201088b63708158 Mon Sep 17 00:00:00 2001 From: Jatin Chaudhary <51944368+cjatin@users.noreply.github.com> Date: Tue, 17 Mar 2020 14:13:19 +0530 Subject: [PATCH 025/132] Adding Half Abs APIs (#1902) --- include/hip/hcc_detail/hip_fp16.h | 14 ++++++++ include/hip/hcc_detail/hip_fp16_math_fwd.h | 2 ++ tests/src/deviceLib/hipTestHalf.cpp | 39 ++++++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h index 52abc1a004..77a7bba60d 100644 --- a/include/hip/hcc_detail/hip_fp16.h +++ b/include/hip/hcc_detail/hip_fp16.h @@ -1268,6 +1268,13 @@ THE SOFTWARE. static_cast<__half_raw>(x).data + static_cast<__half_raw>(y).data}; } + inline + __device__ + __half __habs(__half x) + { + return __half_raw{ + __ocml_fabs_f16(static_cast<__half_raw>(x).data)}; + } inline __device__ __half __hsub(__half x, __half y) @@ -1334,6 +1341,13 @@ THE SOFTWARE. static_cast<__half2_raw>(x).data + static_cast<__half2_raw>(y).data}; } + inline + __device__ + __half2 __habs2(__half2 x) + { + return __half2_raw{ + __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)}; + } inline __device__ __half2 __hsub2(__half2 x, __half2 y) diff --git a/include/hip/hcc_detail/hip_fp16_math_fwd.h b/include/hip/hcc_detail/hip_fp16_math_fwd.h index eeb617c40b..95403e6ca8 100644 --- a/include/hip/hcc_detail/hip_fp16_math_fwd.h +++ b/include/hip/hcc_detail/hip_fp16_math_fwd.h @@ -38,6 +38,7 @@ extern "C" __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16); + __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16); __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16); __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); @@ -58,6 +59,7 @@ extern "C" #endif __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16); __device__ __2f16 __ocml_cos_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16); diff --git a/tests/src/deviceLib/hipTestHalf.cpp b/tests/src/deviceLib/hipTestHalf.cpp index 751d44e242..64a9f7fa63 100644 --- a/tests/src/deviceLib/hipTestHalf.cpp +++ b/tests/src/deviceLib/hipTestHalf.cpp @@ -96,6 +96,18 @@ void kernel_hisinf(__half* input, int* output) { output[tx] = __hisinf(input[tx]); } +__global__ void testHalfAbs(float* p) { + auto a = __float2half(*p); + a = __habs(a); + *p = __half2float(a); +} + +__global__ void testHalf2Abs(float2* p) { + auto a = __float22half2_rn(*p); + a = __habs2(a); + *p = __half22float2(a); +} + #endif @@ -237,6 +249,31 @@ void checkFunctional() { return; } +void checkHalfAbs() { + { + float *p; + hipMalloc(&p, sizeof(float)); + float pp = -2.1f; + hipMemcpy(p, &pp, sizeof(float), hipMemcpyDefault); + hipLaunchKernelGGL(testHalfAbs, 1, 1, 0, 0, p); + hipMemcpy(&pp, p, sizeof(float), hipMemcpyDefault); + hipFree(p); + if(pp < 0.0f) { failed("Half Abs failed"); } + } + { + float2 *p; + hipMalloc(&p, sizeof(float2)); + float2 pp; + pp.x = -2.1f; + pp.y = -1.1f; + hipMemcpy(p, &pp, sizeof(float2), hipMemcpyDefault); + hipLaunchKernelGGL(testHalf2Abs, 1, 1, 0, 0, p); + hipMemcpy(&pp, p, sizeof(float2), hipMemcpyDefault); + hipFree(p); + if(pp.x < 0.0f || pp.y < 0.0f) { failed("Half2 Abs Test Failed"); } + } +} + int main() { bool* result{nullptr}; hipMemAllocHost((void**)&result, sizeof(result)); @@ -260,5 +297,7 @@ int main() { // run some functional checks checkFunctional(); + checkHalfAbs(); + passed(); } From b4bf6add33aeae622b78104d670c5d0fafdc0dc5 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 17 Mar 2020 18:51:46 +0300 Subject: [PATCH 026/132] [HIPIFY][doc] Update README.md: LLVM 10.0.0-rc4 is supported --- hipify-clang/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hipify-clang/README.md b/hipify-clang/README.md index 88d7a72ccd..bfa017def2 100644 --- a/hipify-clang/README.md +++ b/hipify-clang/README.md @@ -42,10 +42,10 @@ After applying all the matchers, the output HIP source is produced. `hipify-clang` requires: -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [3.8.0](http://releases.llvm.org/download.html#3.8.0); the latest stable and recommended release: [**9.0.1**](http://releases.llvm.org/download.html#9.0.1), the latest release candidate: [10.0.0-rc3](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc3). +1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [3.8.0](http://releases.llvm.org/download.html#3.8.0); the latest stable and recommended release: [**9.0.1**](http://releases.llvm.org/download.html#9.0.1), the latest release candidate: [10.0.0-rc4](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc4). 2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [7.0](https://developer.nvidia.com/cuda-toolkit-70), the latest supported version is [**10.1 Update 2**](https://developer.nvidia.com/cuda-10.1-download-archive-base). -To use the latest CUDA version [10.2](https://developer.nvidia.com/cuda-downloads) please use the latest `LLVM` release candidate: [10.0.0-rc3](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc3). +To use the latest CUDA version [10.2](https://developer.nvidia.com/cuda-downloads) please use the latest `LLVM` release candidate: [10.0.0-rc4](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc4). | **LLVM release version** | **CUDA latest supported version** | **Windows** | **Linux** | |:----------------------------------------------------------:|:------------------------------------------------------------------------:|:-----------:|:---------:| @@ -67,7 +67,7 @@ To use the latest CUDA version [10.2](https://developer.nvidia.com/cuda-download | [8.0.1](http://releases.llvm.org/download.html#8.0.1) | [10.0](https://developer.nvidia.com/cuda-10.0-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_8.0.1_bug_38811.zip)*
| + | | [9.0.0](http://releases.llvm.org/download.html#9.0.0) | [10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) | + | + | | [**9.0.1**](http://releases.llvm.org/download.html#9.0.1) | [**10.1**](https://developer.nvidia.com/cuda-10.1-download-archive-base) | +
**LATEST STABLE RELEASE** | +
**LATEST STABLE RELEASE** | -| [10.0.0-rc3](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc3) | [10.2](https://developer.nvidia.com/cuda-downloads) | + | + | +| [10.0.0-rc4](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc4) | [10.2](https://developer.nvidia.com/cuda-downloads) | + | + | `*` Download the patch and unpack it into your `LLVM` distributive directory; a few header files will be overwritten; rebuilding of `LLVM` is not needed. @@ -158,7 +158,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build pro **LLVM 10.0.0 or newer:** -1. download [`LLVM project`](https://github.com/llvm/llvm-project/archive/llvmorg-10.0.0-rc3.tar.gz) sources; +1. download [`LLVM project`](https://github.com/llvm/llvm-project/archive/llvmorg-10.0.0-rc4.tar.gz) sources; 2. build [`LLVM project`](http://llvm.org/docs/CMake.html): **Linux**: @@ -249,7 +249,7 @@ On Linux the following configurations are tested: Ubuntu 14: LLVM 5.0.0 - 6.0.1, CUDA 7.0 - 9.0, cudnn-5.0.5 - cudnn-7.6.5.32 -Ubuntu 16-18: LLVM 8.0.0 - 10.0.0-rc3, CUDA 8.0 - 10.2, cudnn-5.1.10 - cudnn-7.6.5.32 +Ubuntu 16-18: LLVM 8.0.0 - 10.0.0-rc4, CUDA 8.0 - 10.2, cudnn-5.1.10 - cudnn-7.6.5.32 Minimum build system requirements for the above configurations: @@ -404,8 +404,8 @@ Testing Time: 3.07s | 7.0.0 - 7.1.0 | 9.2 | 7.6.5.32 | 2017.15.9.11 | 3.13.3 | 3.7.3 | | 8.0.0 - 8.0.1 | 10.0 | 7.6.5.32 | 2017.15.9.15 | 3.14.2 | 3.7.4 | | 9.0.0 - 9.0.1 | 10.1 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.4 | 3.8.0 | -| 10.0.0-rc1-rc3 | 10.2 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.4 | 3.8.1 | -| 11.0.0git | 10.2 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.5 | 3.8.2 | +| 10.0.0-rc1-rc4 | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.0 | 3.16.5 | 3.8.2 | +| 11.0.0git | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.0 | 3.16.5 | 3.8.2 | *Building with testing support on `Windows 10` by `Visual Studio 16 2019`:* From 58058091adfd7e05f81ab6bd237897c1b2f66ecc Mon Sep 17 00:00:00 2001 From: Paul Fultz II Date: Wed, 18 Mar 2020 00:48:58 -0500 Subject: [PATCH 027/132] Add missing flags for hip::device target on hip-clang (#1230) This adds the missing compilation flags to hip::device so it can compile with hip-clang compiler. --- hip-config-clang.cmake.in | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/hip-config-clang.cmake.in b/hip-config-clang.cmake.in index 9680b2ff8e..583444673d 100644 --- a/hip-config-clang.cmake.in +++ b/hip-config-clang.cmake.in @@ -50,9 +50,46 @@ set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") +get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) +get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) +file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) +find_path(HIP_CLANG_INCLUDE_PATH stddef.h + HINTS + ${HIP_CLANG_INCLUDE_SEARCH_PATHS} + NO_DEFAULT_PATH) + find_dependency(amd_comgr) +find_dependency(AMDDeviceLibs) +set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "AMD GPU targets to compile for") +set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for") + include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) +set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS -x hip --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib +) + +set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_LINK_LIBRARIES --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib --hip-link +) + +set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" +) + +set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" +) + +foreach(GPU_TARGET ${GPU_TARGETS}) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS "--cuda-gpu-arch=${GPU_TARGET}" + ) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_LINK_LIBRARIES "--cuda-gpu-arch=${GPU_TARGET}" + ) +endforeach() + set( hip_LIBRARIES hip::host hip::device) set( hip_LIBRARY ${hip_LIBRARIES}) From b5e683a35d80cba27a3387bb5209e22662bbeba2 Mon Sep 17 00:00:00 2001 From: jglaser Date: Wed, 18 Mar 2020 01:50:06 -0400 Subject: [PATCH 028/132] Implement accurate max block size in hipFuncGetAttributes() (#1676) This PR takes ensures that the maxThreadsPerBlock returned by hipFuncGetAttributes is both a multiple of the warp size and that the register usage of the maximum block does not exceed the number of available registers. Fixes #1662 --- src/hip_module.cpp | 50 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 6ec260b58a..09d177a102 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -1247,7 +1247,7 @@ const amd_kernel_code_v3_t *header_v3(const ihipModuleSymbol_t& kd) { return reinterpret_cast(kd._header); } -hipFuncAttributes make_function_attributes(TlsData *tls, const ihipModuleSymbol_t& kd) { +hipFuncAttributes make_function_attributes(TlsData *tls, ihipModuleSymbol_t& kd) { hipFuncAttributes r{}; hipDeviceProp_t prop{}; @@ -1257,23 +1257,57 @@ hipFuncAttributes make_function_attributes(TlsData *tls, const ihipModuleSymbol_ prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; if (kd._is_code_object_v3) { - r.localSizeBytes = header_v3(kd)->private_segment_fixed_size; - r.sharedSizeBytes = header_v3(kd)->group_segment_fixed_size; - r.numRegs = ((header_v3(kd)->compute_pgm_rsrc1 & 0x3F) + 1) << 2; r.binaryVersion = 0; // FIXME: should it be the ISA version or code // object format version? + r.localSizeBytes = header_v3(kd)->private_segment_fixed_size; + r.sharedSizeBytes = header_v3(kd)->group_segment_fixed_size; } else { r.localSizeBytes = kd._header->workitem_private_segment_byte_size; r.sharedSizeBytes = kd._header->workgroup_group_segment_byte_size; - r.numRegs = kd._header->workitem_vgpr_count; r.binaryVersion = kd._header->amd_machine_version_major * 10 + kd._header->amd_machine_version_minor; } r.maxDynamicSharedSizeBytes = prop.sharedMemPerBlock - r.sharedSizeBytes; - r.maxThreadsPerBlock = r.numRegs ? - std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) : - prop.maxThreadsPerBlock; + + size_t usedVGPRS = 0; + size_t usedSGPRS = 0; + size_t usedLDS = 0; + getGprsLdsUsage(&kd, &usedVGPRS, &usedSGPRS, &usedLDS); + + r.numRegs = usedVGPRS; + + size_t wavefrontSize = prop.warpSize; + size_t maxWavefrontsPerBlock = prop.maxThreadsPerBlock / wavefrontSize; + size_t maxWavefrontsPerCU = min(prop.maxThreadsPerMultiProcessor / wavefrontSize, 32); + const size_t numSIMD = 4; + const size_t maxWavesPerSimd = maxWavefrontsPerCU / numSIMD; + size_t maxWaves = 0; + for (int i = 0; i < maxWavefrontsPerBlock; i++) { + size_t wavefronts = i + 1; + + if (usedVGPRS > 0) { + size_t availableVGPRs = (prop.regsPerBlock / wavefrontSize / numSIMD); + size_t vgprs_alu_occupancy = numSIMD * std::min(maxWavesPerSimd, availableVGPRs / usedVGPRS); + + // Calculate blocks occupancy per CU based on VGPR usage + if (vgprs_alu_occupancy < wavefronts) + break; + } + + if (usedSGPRS > 0) { + const size_t availableSGPRs = (prop.gcnArch < 800) ? 512 : 800; + size_t sgprs_alu_occupancy = numSIMD * ((usedSGPRS == 0) ? maxWavesPerSimd + : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); + + // Calculate blocks occupancy per CU based on SGPR usage + if (sgprs_alu_occupancy < wavefronts) + break; + } + maxWaves = wavefronts; + } + + r.maxThreadsPerBlock = maxWaves * wavefrontSize; r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0. return r; From 08d9759ebade199512a7d8dec319cfc098a556b1 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 18 Mar 2020 01:50:21 -0400 Subject: [PATCH 029/132] Workaround for libc++ include path for HIP-Clang (#1917) HIP-Clang cuda_wrapper headers require clang include path before standard C++ include path. However libc++ include path requires to be before clang include path. To workaround this, we pass -isystem with the parent directory of clang include path instead of the clang include path itself. --- bin/hipcc | 2 +- include/hip/hcc_detail/hip_runtime.h | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 929fe1cd71..80d28a7b8e 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -207,7 +207,7 @@ if ($HIP_PLATFORM eq "clang") { } else { $HIPCXXFLAGS .= " -std=c++11"; } - $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH"; + $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPLDFLAGS .= " -L$HIP_LIB_PATH"; if (not $isWindows) { $HIPLDFLAGS .= " -Wl,--rpath-link=$HIP_LIB_PATH"; diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 448331a868..0707cc6899 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -501,9 +501,14 @@ hc_get_workitem_absolute_id(int dim) #define __CUDA__ #include <__clang_cuda_math_forward_declares.h> #include <__clang_cuda_complex_builtins.h> -#include -#include -#include +// Workaround for using libc++ with HIP-Clang. +// The following headers requires clang include path before standard C++ include path. +// However libc++ include path requires to be before clang include path. +// To workaround this, we pass -isystem with the parent directory of clang include +// path instead of the clang include path itself. +#include +#include +#include #undef __CUDA__ #pragma pop_macro("__CUDA__") #endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ From 4acb0ea0381056c91fb9d907b2a6e14062182fec Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Wed, 18 Mar 2020 01:50:43 -0400 Subject: [PATCH 030/132] [HIP] use markers to sync cooperative and normal queues (#1948) --- src/hip_module.cpp | 50 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 09d177a102..1130dec26a 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -574,10 +574,15 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, // create a cooperative accelerated view for launching gws and main kernels hc::accelerator_view coopAV = acc.create_cooperative_view(); - // wait for this stream to finish operations - stream->locked_wait(); - LockedAccessor_StreamCrit_t streamCrit(stream->criticalData(), false); + + // the cooperative queue will wait until this stream completes its operations + hc::completion_future streamCF; + if (!streamCrit->_av.get_is_empty()) { + streamCF = streamCrit->_av.create_marker(hc::accelerator_scope); + coopAV.create_blocking_marker(streamCF, hc::accelerator_scope); + } + streamCrit->_av.acquire_locked_hsa_queue(); coopAV.acquire_locked_hsa_queue(); @@ -605,12 +610,18 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, sharedMemBytes, stream, kernelParams, nullptr, nullptr, nullptr, 0, true, impCoopParams, &coopAV); - stream->criticalData().unlock(); - stream->criticalData()._av.release_locked_hsa_queue(); - coopAV.release_locked_hsa_queue(); - // wait on the dispatch on the dedicated cooperative queue to finish - coopAV.wait(hc::hcWaitModeActive); + coopAV.release_locked_hsa_queue(); + stream->criticalData()._av.release_locked_hsa_queue(); + + // this stream will wait until the cooperative queue completes its operations + hc::completion_future cooperativeCF; + if (!coopAV.get_is_empty()) { + cooperativeCF = coopAV.create_marker(hc::accelerator_scope); + streamCrit->_av.create_blocking_marker(cooperativeCF, hc::accelerator_scope); + } + + stream->criticalData().unlock(); return result; #else @@ -747,8 +758,14 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL // lock all streams before launching the blit kernels for initializing the GWS and main kernels to each device for (int i = 0; i < numDevices; ++i) { - launchParamsList[i].stream->locked_wait(); LockedAccessor_StreamCrit_t streamCrit(launchParamsList[i].stream->criticalData(), false); + + hc::completion_future streamCF; + if (!streamCrit->_av.get_is_empty()) { + streamCF = streamCrit->_av.create_marker(hc::accelerator_scope); + coopAVs[i].create_blocking_marker(streamCF, hc::accelerator_scope); + } + streamCrit->_av.acquire_locked_hsa_queue(); coopAVs[i].acquire_locked_hsa_queue(); } @@ -834,14 +851,17 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL // unlock all streams for (int i = 0; i < numDevices; ++i) { - launchParamsList[i].stream->criticalData().unlock(); - launchParamsList[i].stream->criticalData()._av.release_locked_hsa_queue(); coopAVs[i].release_locked_hsa_queue(); - } + launchParamsList[i].stream->criticalData()._av.release_locked_hsa_queue(); - // wait on the dispatch on cooperative queues on each device to finish - for (int i = 0; i < numDevices; ++i) { - coopAVs[i].wait(hc::hcWaitModeActive); + hc::completion_future cooperativeCF; + if (!coopAVs[i].get_is_empty()) { + cooperativeCF = coopAVs[i].create_marker(hc::accelerator_scope); + launchParamsList[i].stream->criticalData()._av.create_blocking_marker( + cooperativeCF, hc::accelerator_scope); + } + + launchParamsList[i].stream->criticalData().unlock(); } hip_internal::ihipHostFree(tls, mg_sync_ptr); From 7d47cc4150466487038bfc48d5a602ddd004bfc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nagy-Egri=20M=C3=A1t=C3=A9=20Ferenc?= Date: Wed, 18 Mar 2020 06:50:53 +0100 Subject: [PATCH 031/132] Remove debug message (#1950) --- cmake/FindHIP.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/FindHIP.cmake b/cmake/FindHIP.cmake index 7edf27f3c7..0819a0364c 100644 --- a/cmake/FindHIP.cmake +++ b/cmake/FindHIP.cmake @@ -75,7 +75,6 @@ if(UNIX AND NOT APPLE AND NOT CYGWIN) endif() # And push it back to the cache set(HIP_ROOT_DIR ${HIP_ROOT_DIR} CACHE PATH "HIP installed location" FORCE) - message("Found HIP at ${HIP_ROOT_DIR}") endif() # Find HIPCC executable From 5f6c8fa5359ddb1ea6c9b51cbaedd3c885bb3fbb Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 18 Mar 2020 13:07:46 +0300 Subject: [PATCH 032/132] [HIPIFY][SPARSE] sync with hipSPARSE + Update doc and hipify-perl accordingly --- bin/hipify-perl | 7 +++++++ docs/markdown/CUSPARSE_API_supported_by_HIP.md | 14 +++++++------- hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp | 8 ++++---- hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp | 6 +++--- .../unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu | 3 +-- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index f6de5abae4..dc039cb8a1 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -759,6 +759,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseCgthr\b/hipsparseCgthr/g; $ft{'library'} += s/\bcusparseCgthrz\b/hipsparseCgthrz/g; $ft{'library'} += s/\bcusparseChybmv\b/hipsparseChybmv/g; + $ft{'library'} += s/\bcusparseCnnz\b/hipsparseCnnz/g; $ft{'library'} += s/\bcusparseCreate\b/hipsparseCreate/g; $ft{'library'} += s/\bcusparseCreateCsrgemm2Info\b/hipsparseCreateCsrgemm2Info/g; $ft{'library'} += s/\bcusparseCreateCsrilu02Info\b/hipsparseCreateCsrilu02Info/g; @@ -794,6 +795,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseDgthr\b/hipsparseDgthr/g; $ft{'library'} += s/\bcusparseDgthrz\b/hipsparseDgthrz/g; $ft{'library'} += s/\bcusparseDhybmv\b/hipsparseDhybmv/g; + $ft{'library'} += s/\bcusparseDnnz\b/hipsparseDnnz/g; $ft{'library'} += s/\bcusparseDroti\b/hipsparseDroti/g; $ft{'library'} += s/\bcusparseDsctr\b/hipsparseDsctr/g; $ft{'library'} += s/\bcusparseGetMatDiagType\b/hipsparseGetMatDiagType/g; @@ -830,6 +832,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseSgthr\b/hipsparseSgthr/g; $ft{'library'} += s/\bcusparseSgthrz\b/hipsparseSgthrz/g; $ft{'library'} += s/\bcusparseShybmv\b/hipsparseShybmv/g; + $ft{'library'} += s/\bcusparseSnnz\b/hipsparseSnnz/g; $ft{'library'} += s/\bcusparseSroti\b/hipsparseSroti/g; $ft{'library'} += s/\bcusparseSsctr\b/hipsparseSsctr/g; $ft{'library'} += s/\bcusparseXbsrilu02_zeroPivot\b/hipsparseXbsrilu02_zeroPivot/g; @@ -868,6 +871,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseZgthr\b/hipsparseZgthr/g; $ft{'library'} += s/\bcusparseZgthrz\b/hipsparseZgthrz/g; $ft{'library'} += s/\bcusparseZhybmv\b/hipsparseZhybmv/g; + $ft{'library'} += s/\bcusparseZnnz\b/hipsparseZnnz/g; $ft{'library'} += s/\bcusparseZsctr\b/hipsparseZsctr/g; $ft{'device_library'} += s/\bcurand\b/hiprand/g; $ft{'device_library'} += s/\bcurand_discrete\b/hiprand_discrete/g; @@ -1124,6 +1128,7 @@ sub simpleSubstitutions { $ft{'type'} += s/\bcurandStatus_t\b/hiprandStatus_t/g; $ft{'type'} += s/\bcusparseAction_t\b/hipsparseAction_t/g; $ft{'type'} += s/\bcusparseDiagType_t\b/hipsparseDiagType_t/g; + $ft{'type'} += s/\bcusparseDirection_t\b/hipsparseDirection_t/g; $ft{'type'} += s/\bcusparseFillMode_t\b/hipsparseFillMode_t/g; $ft{'type'} += s/\bcusparseHandle_t\b/hipsparseHandle_t/g; $ft{'type'} += s/\bcusparseHybMat_t\b/hipsparseHybMat_t/g; @@ -1392,6 +1397,8 @@ sub simpleSubstitutions { $ft{'numeric_literal'} += s/\bCUSPARSE_ACTION_SYMBOLIC\b/HIPSPARSE_ACTION_SYMBOLIC/g; $ft{'numeric_literal'} += s/\bCUSPARSE_DIAG_TYPE_NON_UNIT\b/HIPSPARSE_DIAG_TYPE_NON_UNIT/g; $ft{'numeric_literal'} += s/\bCUSPARSE_DIAG_TYPE_UNIT\b/HIPSPARSE_DIAG_TYPE_UNIT/g; + $ft{'numeric_literal'} += s/\bCUSPARSE_DIRECTION_COLUMN\b/HIPSPARSE_DIRECTION_COLUMN/g; + $ft{'numeric_literal'} += s/\bCUSPARSE_DIRECTION_ROW\b/HIPSPARSE_DIRECTION_ROW/g; $ft{'numeric_literal'} += s/\bCUSPARSE_FILL_MODE_LOWER\b/HIPSPARSE_FILL_MODE_LOWER/g; $ft{'numeric_literal'} += s/\bCUSPARSE_FILL_MODE_UPPER\b/HIPSPARSE_FILL_MODE_UPPER/g; $ft{'numeric_literal'} += s/\bCUSPARSE_HYB_PARTITION_AUTO\b/HIPSPARSE_HYB_PARTITION_AUTO/g; diff --git a/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/docs/markdown/CUSPARSE_API_supported_by_HIP.md index fc7a8ee8cd..939c631d67 100644 --- a/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -12,9 +12,9 @@ | enum |***`cusparseAction_t`*** | |***`hipsparseAction_t`*** | | 0 |*`CUSPARSE_ACTION_SYMBOLIC`* | |*`HIPSPARSE_ACTION_SYMBOLIC`* | | 1 |*`CUSPARSE_ACTION_NUMERIC`* | |*`HIPSPARSE_ACTION_NUMERIC`* | -| enum |***`cusparseDirection_t`*** | | | -| 0 |*`CUSPARSE_DIRECTION_ROW`* | | | -| 1 |*`CUSPARSE_DIRECTION_COLUMN`* | | | +| enum |***`cusparseDirection_t`*** | |***`hipsparseDirection_t`*** | +| 0 |*`CUSPARSE_DIRECTION_ROW`* | |*`HIPSPARSE_DIRECTION_ROW`* | +| 1 |*`CUSPARSE_DIRECTION_COLUMN`* | |*`HIPSPARSE_DIRECTION_COLUMN`* | | enum |***`cusparseHybPartition_t`*** | |***`hipsparseHybPartition_t`*** | | 0 |*`CUSPARSE_HYB_PARTITION_AUTO`* | |*`HIPSPARSE_HYB_PARTITION_AUTO`* | | 1 |*`CUSPARSE_HYB_PARTITION_USER`* | |*`HIPSPARSE_HYB_PARTITION_USER`* | @@ -662,10 +662,10 @@ |`cusparseDhyb2dense` | | |`cusparseChyb2dense` | | |`cusparseZhyb2dense` | | -|`cusparseSnnz` | | -|`cusparseDnnz` | | -|`cusparseCnnz` | | -|`cusparseZnnz` | | +|`cusparseSnnz` |`cusparseSnnz` | +|`cusparseDnnz` |`cusparseDnnz` | +|`cusparseCnnz` |`cusparseCnnz` | +|`cusparseZnnz` |`cusparseZnnz` | |`cusparseCreateIdentityPermutation` |`hipsparseCreateIdentityPermutation` | |`cusparseXcoosort_bufferSizeExt` |`hipsparseXcoosort_bufferSizeExt` | |`cusparseXcoosortByRow` |`hipsparseXcoosortByRow` | diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp index 1df1bb9cba..d5d32cd0ff 100644 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -619,10 +619,10 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseChyb2dense", {"hipsparseChyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZhyb2dense", {"hipsparseZhyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSnnz", {"hipsparseSnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnnz", {"hipsparseDnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCnnz", {"hipsparseCnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZnnz", {"hipsparseZnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSnnz", {"hipsparseSnnz", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDnnz", {"hipsparseDnnz", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCnnz", {"hipsparseCnnz", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseZnnz", {"hipsparseZnnz", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", "", CONV_LIB_FUNC, API_SPARSE}}, diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp index 1d3fe28c62..8cebef51d4 100644 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp +++ b/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp @@ -88,9 +88,9 @@ const std::map CUDA_SPARSE_TYPE_NAME_MAP{ {"CUSPARSE_ACTION_SYMBOLIC", {"HIPSPARSE_ACTION_SYMBOLIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, {"CUSPARSE_ACTION_NUMERIC", {"HIPSPARSE_ACTION_NUMERIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"cusparseDirection_t", {"hipsparseDirection_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_DIRECTION_ROW", {"HIPSPARSE_DIRECTION_ROW", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_DIRECTION_COLUMN", {"HIPSPARSE_DIRECTION_COLUMN", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDirection_t", {"hipsparseDirection_t", "", CONV_TYPE, API_SPARSE}}, + {"CUSPARSE_DIRECTION_ROW", {"HIPSPARSE_DIRECTION_ROW", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, + {"CUSPARSE_DIRECTION_COLUMN", {"HIPSPARSE_DIRECTION_COLUMN", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, {"cusparseHybPartition_t", {"hipsparseHybPartition_t", "", CONV_TYPE, API_SPARSE}}, {"CUSPARSE_HYB_PARTITION_AUTO", {"HIPSPARSE_HYB_PARTITION_AUTO", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu index c6d62c0007..e6a2178053 100644 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu +++ b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu @@ -385,8 +385,7 @@ double compute_BSR(BCRSArrays& bcsr, double *x , double *y){ cudaEventCreate(&startTime); cudaEventCreate(&stopTime); cudaEventRecord(startTime, bcsr.streamId); - // NOTE: cusparseDbsrmv and CUSPARSE_DIRECTION_COLUMN (of type cusparseDirection_t) are yet unsupported by HIP - // CHECK: cusparseDbsrmv(bcsr.cusparseHandle, CUSPARSE_DIRECTION_COLUMN, HIPSPARSE_OPERATION_NON_TRANSPOSE, + // CHECK: cusparseDbsrmv(bcsr.cusparseHandle, HIPSPARSE_DIRECTION_COLUMN, HIPSPARSE_OPERATION_NON_TRANSPOSE, cusparseDbsrmv(bcsr.cusparseHandle, CUSPARSE_DIRECTION_COLUMN, CUSPARSE_OPERATION_NON_TRANSPOSE, bcsr.nbBlockRow, bcsr.m, bcsr.nbBlocks, &alpha, descr, bcsr.cu_bsrValC, bcsr.cu_bsrRowPtrC, bcsr.cu_bsrColIndC, bcsr.blockSize, From 234eae9e926b5f2c49f5c637d3ad7678934e35e0 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 18 Mar 2020 13:44:50 +0300 Subject: [PATCH 033/132] [HIPIFY][SPARSE] sync with hipSPARSE's "added csrsm2" (#83) + https://github.com/ROCmSoftwarePlatform/hipSPARSE/pull/83 + Update doc and hipify-perl accordingly --- bin/hipify-perl | 17 ++++++++++ .../markdown/CUSPARSE_API_supported_by_HIP.md | 34 +++++++++---------- .../src/CUDA2HIP_SPARSE_API_functions.cpp | 30 ++++++++-------- .../src/CUDA2HIP_SPARSE_API_types.cpp | 4 +-- 4 files changed, 51 insertions(+), 34 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index dc039cb8a1..d178c87729 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -750,6 +750,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseCcsrmm\b/hipsparseCcsrmm/g; $ft{'library'} += s/\bcusparseCcsrmm2\b/hipsparseCcsrmm2/g; $ft{'library'} += s/\bcusparseCcsrmv\b/hipsparseCcsrmv/g; + $ft{'library'} += s/\bcusparseCcsrsm2_analysis\b/hipsparseCcsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseCcsrsm2_bufferSizeExt\b/hipsparseCcsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseCcsrsm_solve\b/hipsparseCcsrsm_solve/g; $ft{'library'} += s/\bcusparseCcsrsv2_analysis\b/hipsparseCcsrsv2_analysis/g; $ft{'library'} += s/\bcusparseCcsrsv2_bufferSize\b/hipsparseCcsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseCcsrsv2_bufferSizeExt\b/hipsparseCcsrsv2_bufferSizeExt/g; @@ -763,6 +766,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseCreate\b/hipsparseCreate/g; $ft{'library'} += s/\bcusparseCreateCsrgemm2Info\b/hipsparseCreateCsrgemm2Info/g; $ft{'library'} += s/\bcusparseCreateCsrilu02Info\b/hipsparseCreateCsrilu02Info/g; + $ft{'library'} += s/\bcusparseCreateCsrsm2Info\b/hipsparseCreateCsrsm2Info/g; $ft{'library'} += s/\bcusparseCreateCsrsv2Info\b/hipsparseCreateCsrsv2Info/g; $ft{'library'} += s/\bcusparseCreateHybMat\b/hipsparseCreateHybMat/g; $ft{'library'} += s/\bcusparseCreateIdentityPermutation\b/hipsparseCreateIdentityPermutation/g; @@ -781,6 +785,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseDcsrmm\b/hipsparseDcsrmm/g; $ft{'library'} += s/\bcusparseDcsrmm2\b/hipsparseDcsrmm2/g; $ft{'library'} += s/\bcusparseDcsrmv\b/hipsparseDcsrmv/g; + $ft{'library'} += s/\bcusparseDcsrsm2_analysis\b/hipsparseDcsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseDcsrsm2_bufferSizeExt\b/hipsparseDcsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseDcsrsm_solve\b/hipsparseDcsrsm_solve/g; $ft{'library'} += s/\bcusparseDcsrsv2_analysis\b/hipsparseDcsrsv2_analysis/g; $ft{'library'} += s/\bcusparseDcsrsv2_bufferSize\b/hipsparseDcsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseDcsrsv2_bufferSizeExt\b/hipsparseDcsrsv2_bufferSizeExt/g; @@ -789,6 +796,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseDestroy\b/hipsparseDestroy/g; $ft{'library'} += s/\bcusparseDestroyCsrgemm2Info\b/hipsparseDestroyCsrgemm2Info/g; $ft{'library'} += s/\bcusparseDestroyCsrilu02Info\b/hipsparseDestroyCsrilu02Info/g; + $ft{'library'} += s/\bcusparseDestroyCsrsm2Info\b/hipsparseDestroyCsrsm2Info/g; $ft{'library'} += s/\bcusparseDestroyCsrsv2Info\b/hipsparseDestroyCsrsv2Info/g; $ft{'library'} += s/\bcusparseDestroyHybMat\b/hipsparseDestroyHybMat/g; $ft{'library'} += s/\bcusparseDestroyMatDescr\b/hipsparseDestroyMatDescr/g; @@ -818,6 +826,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseScsrmm\b/hipsparseScsrmm/g; $ft{'library'} += s/\bcusparseScsrmm2\b/hipsparseScsrmm2/g; $ft{'library'} += s/\bcusparseScsrmv\b/hipsparseScsrmv/g; + $ft{'library'} += s/\bcusparseScsrsm2_analysis\b/hipsparseScsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseScsrsm2_bufferSizeExt\b/hipsparseScsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseScsrsm_solve\b/hipsparseScsrsm_solve/g; $ft{'library'} += s/\bcusparseScsrsv2_analysis\b/hipsparseScsrsv2_analysis/g; $ft{'library'} += s/\bcusparseScsrsv2_bufferSize\b/hipsparseScsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseScsrsv2_bufferSizeExt\b/hipsparseScsrsv2_bufferSizeExt/g; @@ -846,6 +857,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseXcsrgemm2Nnz\b/hipsparseXcsrgemm2Nnz/g; $ft{'library'} += s/\bcusparseXcsrgemmNnz\b/hipsparseXcsrgemmNnz/g; $ft{'library'} += s/\bcusparseXcsrilu02_zeroPivot\b/hipsparseXcsrilu02_zeroPivot/g; + $ft{'library'} += s/\bcusparseXcsrsm2_zeroPivot\b/hipsparseXcsrsm2_zeroPivot/g; $ft{'library'} += s/\bcusparseXcsrsort\b/hipsparseXcsrsort/g; $ft{'library'} += s/\bcusparseXcsrsort_bufferSizeExt\b/hipsparseXcsrsort_bufferSizeExt/g; $ft{'library'} += s/\bcusparseXcsrsv2_zeroPivot\b/hipsparseXcsrsv2_zeroPivot/g; @@ -862,6 +874,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseZcsrmm\b/hipsparseZcsrmm/g; $ft{'library'} += s/\bcusparseZcsrmm2\b/hipsparseZcsrmm2/g; $ft{'library'} += s/\bcusparseZcsrmv\b/hipsparseZcsrmv/g; + $ft{'library'} += s/\bcusparseZcsrsm2_analysis\b/hipsparseZcsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseZcsrsm2_bufferSizeExt\b/hipsparseZcsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseZcsrsm_solve\b/hipsparseZcsrsm_solve/g; $ft{'library'} += s/\bcusparseZcsrsv2_analysis\b/hipsparseZcsrsv2_analysis/g; $ft{'library'} += s/\bcusparseZcsrsv2_bufferSize\b/hipsparseZcsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseZcsrsv2_bufferSizeExt\b/hipsparseZcsrsv2_bufferSizeExt/g; @@ -995,6 +1010,8 @@ sub simpleSubstitutions { $ft{'type'} += s/\bcsrgemm2Info\b/csrgemm2Info/g; $ft{'type'} += s/\bcsrgemm2Info_t\b/csrgemm2Info_t/g; $ft{'type'} += s/\bcsrilu02Info_t\b/csrilu02Info_t/g; + $ft{'type'} += s/\bcsrsm2Info\b/csrsm2Info/g; + $ft{'type'} += s/\bcsrsm2Info_t\b/csrsm2Info_t/g; $ft{'type'} += s/\bcsrsv2Info_t\b/csrsv2Info_t/g; $ft{'type'} += s/\bcuComplex\b/hipComplex/g; $ft{'type'} += s/\bcuDoubleComplex\b/hipDoubleComplex/g; diff --git a/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/docs/markdown/CUSPARSE_API_supported_by_HIP.md index 939c631d67..d23b06d307 100644 --- a/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -69,8 +69,8 @@ | typedef |`cusparseSolveAnalysisInfo_t` | | | | struct |`csrsv2Info` | | | | typedef |`csrsv2Info_t` | |`csrsv2Info_t` | -| struct |`csrsm2Info` | 9.2 | | -| typedef |`csrsm2Info_t` | | | +| struct |`csrsm2Info` | 9.2 |`csrsm2Info` | +| typedef |`csrsm2Info_t` | |`csrsm2Info_t` | | struct |`bsrsv2Info` | | | | typedef |`bsrsv2Info_t` | | | | struct |`bsrsm2Info` | | | @@ -151,8 +151,8 @@ |`cusparseGetStream` |`hipsparseGetStream` | 8.0 | |`cusparseCreateCsrsv2Info` |`hipsparseCreateCsrsv2Info` | |`cusparseDestroyCsrsv2Info` |`hipsparseDestroyCsrsv2Info` | -|`cusparseCreateCsrsm2Info` | | 9.2 | -|`cusparseDestroyCsrsm2Info` | | 9.2 | +|`cusparseCreateCsrsm2Info` |`hipsparseCreateCsrsm2Info` | 9.2 | +|`cusparseDestroyCsrsm2Info` |`hipsparseDestroyCsrsm2Info` | 9.2 | |`cusparseCreateCsric02Info` | | |`cusparseDestroyCsric02Info` | | |`cusparseCreateCsrilu02Info` |`hipsparseCreateCsrilu02Info` | @@ -306,19 +306,19 @@ |`cusparseDcsrsm_solve` | | |`cusparseCcsrsm_solve` | | |`cusparseZcsrsm_solve` | | -|`cusparseScsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseDcsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseCcsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseZcsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseScsrsm2_analysis` | | 9.2 | -|`cusparseDcsrsm2_analysis` | | 9.2 | -|`cusparseCcsrsm2_analysis` | | 9.2 | -|`cusparseZcsrsm2_analysis` | | 9.2 | -|`cusparseScsrsm2_solve` | | 9.2 | -|`cusparseDcsrsm2_solve` | | 9.2 | -|`cusparseCcsrsm2_solve` | | 9.2 | -|`cusparseZcsrsm2_solve` | | 9.2 | -|`cusparseXcsrsm2_zeroPivot` | | 9.2 | +|`cusparseScsrsm2_bufferSizeExt` |`hipsparseScsrsm2_bufferSizeExt` | 9.2 | +|`cusparseDcsrsm2_bufferSizeExt` |`hipsparseDcsrsm2_bufferSizeExt` | 9.2 | +|`cusparseCcsrsm2_bufferSizeExt` |`hipsparseCcsrsm2_bufferSizeExt` | 9.2 | +|`cusparseZcsrsm2_bufferSizeExt` |`hipsparseZcsrsm2_bufferSizeExt` | 9.2 | +|`cusparseScsrsm2_analysis` |`hipsparseScsrsm2_analysis` | 9.2 | +|`cusparseDcsrsm2_analysis` |`hipsparseDcsrsm2_analysis` | 9.2 | +|`cusparseCcsrsm2_analysis` |`hipsparseCcsrsm2_analysis` | 9.2 | +|`cusparseZcsrsm2_analysis` |`hipsparseZcsrsm2_analysis` | 9.2 | +|`cusparseScsrsm2_solve` |`hipsparseScsrsm2_solve` | 9.2 | +|`cusparseDcsrsm2_solve` |`hipsparseDcsrsm2_solve` | 9.2 | +|`cusparseCcsrsm2_solve` |`hipsparseCcsrsm2_solve` | 9.2 | +|`cusparseZcsrsm2_solve` |`hipsparseZcsrsm2_solve` | 9.2 | +|`cusparseXcsrsm2_zeroPivot` |`hipsparseXcsrsm2_zeroPivot` | 9.2 | |`cusparseSbsrmm` | | |`cusparseDbsrmm` | | |`cusparseCbsrmm` | | diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp index d5d32cd0ff..0f3997145e 100644 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -49,8 +49,8 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseGetStream", {"hipsparseGetStream", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseCreateCsrsv2Info", {"hipsparseCreateCsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseDestroyCsrsv2Info", {"hipsparseDestroyCsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateCsrsm2Info", {"hipsparseCreateCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyCsrsm2Info", {"hipsparseDestroyCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCreateCsrsm2Info", {"hipsparseCreateCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDestroyCsrsm2Info", {"hipsparseDestroyCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseCreateCsric02Info", {"hipsparseCreateCsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDestroyCsric02Info", {"hipsparseDestroyCsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseCreateCsrilu02Info", {"hipsparseCreateCsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE}}, @@ -218,27 +218,27 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseCcsrsm_analysis", {"hipsparseCcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZcsrsm_analysis", {"hipsparseZcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsrsm_solve", {"hipsparseScsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm_solve", {"hipsparseDcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm_solve", {"hipsparseCcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm_solve", {"hipsparseZcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsrsm_solve", {"hipsparseScsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrsm_solve", {"hipsparseDcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsrsm_solve", {"hipsparseCcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseZcsrsm_solve", {"hipsparseZcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrsm2_bufferSizeExt", {"hipsparseScsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm2_bufferSizeExt", {"hipsparseDcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm2_bufferSizeExt", {"hipsparseCcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm2_bufferSizeExt", {"hipsparseZcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsrsm2_bufferSizeExt", {"hipsparseScsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrsm2_bufferSizeExt", {"hipsparseDcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE,}}, + {"cusparseCcsrsm2_bufferSizeExt", {"hipsparseCcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseZcsrsm2_bufferSizeExt", {"hipsparseZcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrsm2_analysis", {"hipsparseScsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm2_analysis", {"hipsparseDcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm2_analysis", {"hipsparseCcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm2_analysis", {"hipsparseZcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsrsm2_analysis", {"hipsparseScsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrsm2_analysis", {"hipsparseDcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsrsm2_analysis", {"hipsparseCcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseZcsrsm2_analysis", {"hipsparseZcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseScsrsm2_solve", {"hipsparseScsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDcsrsm2_solve", {"hipsparseDcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseCcsrsm2_solve", {"hipsparseCcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZcsrsm2_solve", {"hipsparseZcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseXcsrsm2_zeroPivot", {"hipsparseXcsrsm2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseXcsrsm2_zeroPivot", {"hipsparseXcsrsm2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseSbsrmm", {"hipsparseSbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDbsrmm", {"hipsparseDbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp index 8cebef51d4..aae85a50d3 100644 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp +++ b/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp @@ -41,8 +41,8 @@ const std::map CUDA_SPARSE_TYPE_NAME_MAP{ {"csrsv2Info", {"csrsv2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, {"csrsv2Info_t", {"csrsv2Info_t", "", CONV_TYPE, API_SPARSE}}, - {"csrsm2Info", {"csrsm2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"csrsm2Info_t", {"csrsm2Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, + {"csrsm2Info", {"csrsm2Info", "", CONV_TYPE, API_SPARSE}}, + {"csrsm2Info_t", {"csrsm2Info_t", "", CONV_TYPE, API_SPARSE}}, {"bsrsv2Info", {"bsrsv2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, {"bsrsv2Info_t", {"bsrsv2Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, From c9672b8de5a0403917572c3d80344116f7d0b8b9 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 18 Mar 2020 14:24:02 +0300 Subject: [PATCH 034/132] [HIPIFY][perl] Fix missed sync with hipify-clang --- bin/hipify-perl | 4 ++-- hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index d178c87729..9b170ccb14 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -337,8 +337,8 @@ sub simpleSubstitutions { $ft{'execution'} += s/\bcudaLaunchCooperativeKernelMultiDevice\b/hipLaunchCooperativeKernelMultiDevice/g; $ft{'execution'} += s/\bcudaLaunchKernel\b/hipLaunchKernel/g; $ft{'execution'} += s/\bcudaSetupArgument\b/hipSetupArgument/g; - $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessor\b/hipOccupancyMaxActiveBlocksPerMultiprocessor/g; - $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags\b/hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags/g; + $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessor\b/hipDrvOccupancyMaxActiveBlocksPerMultiprocessor/g; + $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags\b/hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags/g; $ft{'occupancy'} += s/\bcuOccupancyMaxPotentialBlockSize\b/hipOccupancyMaxPotentialBlockSize/g; $ft{'occupancy'} += s/\bcudaOccupancyMaxActiveBlocksPerMultiprocessor\b/hipOccupancyMaxActiveBlocksPerMultiprocessor/g; $ft{'occupancy'} += s/\bcudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags\b/hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags/g; diff --git a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp index ab07a10e93..7be0fd0f3d 100644 --- a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ b/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp @@ -545,9 +545,9 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ // 5.21. Occupancy // cudaOccupancyMaxActiveBlocksPerMultiprocessor - {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}}, + {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags - {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}}, + {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags","", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxPotentialBlockSize {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxPotentialBlockSizeWithFlags From b757c2a891e03a7a2d3fe9ff25f5460c158fbdcb Mon Sep 17 00:00:00 2001 From: nelsonc-amd <44706806+nelsonc-amd@users.noreply.github.com> Date: Thu, 19 Mar 2020 03:45:09 -0700 Subject: [PATCH 035/132] packaging: change Provides to use dash instead of underscore (#1913) Addresses an installation problem for several HIP packages. Packages builds have been tested locally, validation in progress. --- packaging/hip-base.txt | 3 +-- packaging/hip-doc.txt | 3 +-- packaging/hip-hcc.txt | 3 +-- packaging/hip-nvcc.txt | 3 +-- packaging/hip-samples.txt | 3 +-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/packaging/hip-base.txt b/packaging/hip-base.txt index 221ff98608..fc8becf84f 100644 --- a/packaging/hip-base.txt +++ b/packaging/hip-base.txt @@ -26,9 +26,8 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "perl (>= 5.0)") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_base") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-base") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_base") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_base") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") diff --git a/packaging/hip-doc.txt b/packaging/hip-doc.txt index d97ddc7d3a..41db246d31 100644 --- a/packaging/hip-doc.txt +++ b/packaging/hip-doc.txt @@ -32,9 +32,8 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR} set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION})") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_doc") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-doc") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_doc") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_doc") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") diff --git a/packaging/hip-hcc.txt b/packaging/hip-hcc.txt index 21e138e1ed..6a04ebffbd 100644 --- a/packaging/hip-hcc.txt +++ b/packaging/hip-hcc.txt @@ -37,9 +37,8 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@), comgr (>= 1.1)") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_hcc") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-hcc") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_hcc") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_hcc") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") diff --git a/packaging/hip-nvcc.txt b/packaging/hip-nvcc.txt index dc36b628c7..4b11939609 100644 --- a/packaging/hip-nvcc.txt +++ b/packaging/hip-nvcc.txt @@ -19,9 +19,8 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION}), cuda (>= 7.5)") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_nvcc") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-nvcc") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_nvcc") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_nvcc") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") diff --git a/packaging/hip-samples.txt b/packaging/hip-samples.txt index aa8ff42821..b3d8bfa807 100644 --- a/packaging/hip-samples.txt +++ b/packaging/hip-samples.txt @@ -20,9 +20,8 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR} set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION})") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_samples") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-samples") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_samples") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_samples") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") From 01d661b159ef53a03ac864c2a526a254c1f0b602 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Thu, 19 Mar 2020 03:46:04 -0700 Subject: [PATCH 036/132] fix hipStreamAddCallback, block future work on stream (#1934) --- src/hip_hcc.cpp | 14 - src/hip_hcc_internal.h | 14 - src/hip_stream.cpp | 38 +- .../runtimeApi/stream/StreamAddCallback.cpp | 145 +++++++ .../stream/hipStreamAddCallbackCatch.cpp | 409 ++++++++++++++++++ 5 files changed, 587 insertions(+), 33 deletions(-) create mode 100644 tests/src/runtimeApi/stream/StreamAddCallback.cpp create mode 100644 tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 3f7128e964..807dcc7391 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -1520,20 +1520,6 @@ hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream) { return e; } -void ihipStreamCallbackHandler(ihipStreamCallback_t* cb) { - hipError_t e = hipSuccess; - - // Synchronize stream - tprintf(DB_SYNC, "ihipStreamCallbackHandler wait on stream %s\n", - ToString(cb->_stream).c_str()); - GET_TLS(); - e = ihipStreamSynchronize(tls, cb->_stream); - - // Call registered callback function - cb->_callback(cb->_stream, e, cb->_userData); - delete cb; -} - //--- // Get the stream to use for a command submission. // diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index ac63f49dba..c7ff27c7b5 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -654,19 +654,6 @@ class ihipStream_t { }; -//---- -// Internal structure for stream callback handler -class ihipStreamCallback_t { - public: - ihipStreamCallback_t(hipStream_t stream, hipStreamCallback_t callback, void* userData) - : _stream(stream), _callback(callback), _userData(userData) { - }; - hipStream_t _stream; - hipStreamCallback_t _callback; - void* _userData; -}; - - //---- // Internal event structure: enum hipEventStatus_t { @@ -980,7 +967,6 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod, hipStream_t ihipSyncAndResolveStream(hipStream_t, bool lockAcquired = 0); hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream); -void ihipStreamCallbackHandler(ihipStreamCallback_t* cb); // Stream printf functions: inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s) { diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index 2add6a77c4..63551d1204 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -257,11 +257,39 @@ hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) { hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, unsigned int flags) { HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); - hipError_t e = hipSuccess; - // Create a thread in detached mode to handle callback - ihipStreamCallback_t* cb = new ihipStreamCallback_t(stream, callback, userData); - std::thread(ihipStreamCallbackHandler, cb).detach(); + auto stream_original{stream}; + stream = ihipSyncAndResolveStream(stream); - return ihipLogStatus(e); + if (!stream) return hipErrorInvalidValue; + + LockedAccessor_StreamCrit_t cs{stream->criticalData()}; + + // create first marker + auto cf = cs->_av.create_marker(hc::no_scope); + // get its signal + auto signal = *reinterpret_cast(cf.get_native_handle()); + // increment its signal value + hsa_signal_add_relaxed(signal, 1); + + // create callback that can be passed to hsa_amd_signal_async_handler + // this function will call the user's callback, then sets first packet's signal to 0 to indicate completion + auto t{new std::function{[=]() { + callback(stream_original, hipSuccess, userData); + hsa_signal_store_relaxed(signal, 0); + }}}; + + // register above callback with HSA runtime to be called when first packet's signal + // is decremented from 2 to 1 by CP (or it is already at 1) + hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1, + [](hsa_signal_value_t x, void* p) { + (*static_cast(p))(); + delete static_cast(p); + return false; + }, t); + + // create additional marker that blocks on the first one + cs->_av.create_blocking_marker(cf, hc::no_scope); + + return ihipLogStatus(hipSuccess); } diff --git a/tests/src/runtimeApi/stream/StreamAddCallback.cpp b/tests/src/runtimeApi/stream/StreamAddCallback.cpp new file mode 100644 index 0000000000..e6492c7ce2 --- /dev/null +++ b/tests/src/runtimeApi/stream/StreamAddCallback.cpp @@ -0,0 +1,145 @@ +#include +#include +#include +#include "test_common.h" +#include + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ + +enum class ExecState +{ + EXEC_NOT_STARTED, + EXEC_STARTED, + EXEC_CB_STARTED, + EXEC_CB_FINISHED, + EXEC_FINISHED +}; + +struct UserData +{ + size_t size; + int* ptr; +}; + +// Global variable to check exection order +std::atomic gData(ExecState::EXEC_NOT_STARTED); + + +void myCallback(hipStream_t stream, hipError_t status, void* user_data) +{ + if(gData.load() != ExecState::EXEC_STARTED) + return; // Error hence return early + + gData.store(ExecState::EXEC_CB_STARTED); + + UserData* data = reinterpret_cast(user_data); + printf("Callback started\n"); + + sleep(1); + + printf("Callback ending.\n"); + gData.store(ExecState::EXEC_CB_FINISHED); +} + +bool test(int count) +{ + printf("\n============ Test iteration %d =============\n",count); + // Stream + hipStream_t stream; + bool result = true; + + gData.store(ExecState::EXEC_STARTED); + + HIPCHECK(hipStreamCreate(&stream)); + + // Array size + size_t size = 10000; + + // Device array + int *data = NULL; + HIPCHECK(hipMalloc((void**)&data, sizeof(int) * size)); + + // Initialize device array to -1 + HIPCHECK(hipMemset(data, -1, sizeof(int) * size)); + + // Host array + int *host = NULL; + HIPCHECK(hipHostMalloc((void**)&host, sizeof(int) * size)); + + // Print host ptr address + printf("In main thread\n"); + + // Initialize user_data for callback + UserData arg; + arg.size = size; + arg.ptr = host; + + // Synchronize device + HIPCHECK(hipDeviceSynchronize()); + + // Asynchronous copy from device to host + HIPCHECK(hipMemcpyAsync(host, data, sizeof(int) * size, hipMemcpyDeviceToHost, stream)); + + // Asynchronous memset on device + HIPCHECK(hipMemsetAsync(data, 0, sizeof(int) * size, stream)); + + // Add callback - should happen after hipMemsetAsync() + HIPCHECK(hipStreamAddCallback(stream, myCallback, &arg, 0)); + + printf("Will wait in main thread until callback completes\n"); + + //This should synchronize the stream (including the callback) + HIPCHECK(hipStreamSynchronize(stream)); + + if(gData.load() != ExecState::EXEC_CB_FINISHED) + { + std::cout<<"Callback is not finished\n"; + return false; + } + printf("Callback completed will resume main thread execution\n"); + + if(host[size/2] != -1) + { + // Print some host data that just got copied + printf("Pseudo host data printing (should be -1): %d\n", host[size/2]); + result = false; + } + + HIPCHECK(hipMemcpy(host, data, sizeof(int)*size, hipMemcpyDeviceToHost)); + + if(host[size-1] != 0) + { + printf("Pseudo host data printing (should be 0): %d\n", host[size-1]); + result = false; + } + + HIPCHECK(hipFree(data)); + HIPCHECK(hipHostFree(host)); + HIPCHECK(hipStreamDestroy(stream)); + + gData.store(ExecState::EXEC_FINISHED); + return result; +} + +int main() +{ + // Test involves multithreading hence running multiple times + // to make sure consitency in the behavior + bool status = true; + + for(int i=0; i < 10; i++){ + status = test(i+1); + if(status == false) + { + failed("Test Failed!\n"); + break; + } + } + + if(status == true) passed(); + return 0; +} diff --git a/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp new file mode 100644 index 0000000000..5f267bba28 --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp @@ -0,0 +1,409 @@ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ + +#define WORKAROUND 0 // Enable (1) this to make stream thread-safe by a workaround + +template // = queue blocks, until task is finished in enqueue(queue,task) +class QueueHipRt; + +// Queue types used in the tests +using TestQueues = std::tuple, QueueHipRt>; + + +// --- Implementation + +#define HIP_ASSERT(x) (assert((x)==hipSuccess)) +#define HIP_ASSERT_IGNORE(x,ign) auto err=x; HIP_ASSERT(err==ign ? hipSuccess : err) + +#ifdef __HIP_PLATFORM_HCC__ + #define HIPRT_CB +#endif + +template +static auto currentThreadWaitFor(QueueHipRt const & queue) -> void; + +template +class QueueHipRt +{ +public: + static constexpr bool isBlocking = IsBlocking; + //----------------------------------------------------------------------------- + QueueHipRt( + int dev) : + m_dev(dev), + m_HipQueue() + { + HIP_ASSERT( + hipSetDevice( + m_dev)); + HIP_ASSERT( + hipStreamCreateWithFlags( + &m_HipQueue, + hipStreamNonBlocking)); + } + //----------------------------------------------------------------------------- + QueueHipRt(QueueHipRt const &) = delete; + //----------------------------------------------------------------------------- + QueueHipRt(QueueHipRt &&) = delete; + //----------------------------------------------------------------------------- + auto operator=(QueueHipRt const &) -> QueueHipRt & = delete; + //----------------------------------------------------------------------------- + auto operator=(QueueHipRt &&) -> QueueHipRt & = delete; + //----------------------------------------------------------------------------- + ~QueueHipRt() + { + if(isBlocking) { +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + // we are a non-blocking queue, so we have to wait here with its destruction until all spawned tasks have been processed + currentThreadWaitFor(*this); +#endif + } + HIP_ASSERT( + hipSetDevice( + m_dev)); + HIP_ASSERT( + hipStreamDestroy( + m_HipQueue)); + } + +public: + int m_dev; //!< The device this queue is bound to. + hipStream_t m_HipQueue; + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + int m_callees = 0; + std::mutex m_mutex; +#endif +}; + +template +struct Enqueue +{ + //############################################################################# + enum class CallbackState + { + enqueued, + notified, + finished, + }; + + //############################################################################# + struct CallbackSynchronizationData : public std::enable_shared_from_this + { + std::mutex m_mutex; + std::condition_variable m_event; + CallbackState state = CallbackState::enqueued; + }; + + //----------------------------------------------------------------------------- + static void HIPRT_CB hipRtCallback(hipStream_t /*queue*/, hipError_t /*status*/, void *arg) + { + // explicitly copy the shared_ptr so that this method holds the state even when the executing thread has already finished. + const auto pCallbackSynchronizationData = reinterpret_cast(arg)->shared_from_this(); + + // Notify the executing thread. + { + std::unique_lock lock(pCallbackSynchronizationData->m_mutex); + pCallbackSynchronizationData->state = CallbackState::notified; + } + pCallbackSynchronizationData->m_event.notify_one(); + + // Wait for the executing thread to finish the task if it has not already finished. + std::unique_lock lock(pCallbackSynchronizationData->m_mutex); + if(pCallbackSynchronizationData->state != CallbackState::finished) + { + pCallbackSynchronizationData->m_event.wait( + lock, + [pCallbackSynchronizationData](){ + return pCallbackSynchronizationData->state == CallbackState::finished; + } + ); + } + } + + //----------------------------------------------------------------------------- + template + static auto enqueue( + QueueHipRt & queue, + TTask const & task) + -> void + { + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + { + // thread-safe callee incrementing + std::lock_guard guard(queue.m_mutex); + queue.m_callees += 1; + } +#endif + auto pCallbackSynchronizationData = std::make_shared(); + // test example: https://github.com/ROCm-Developer-Tools/HIP/blob/roc-1.9.x/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp + HIP_ASSERT(hipStreamAddCallback( + queue.m_HipQueue, + hipRtCallback, + pCallbackSynchronizationData.get(), + 0u)); + + // We start a new std::thread which stores the task to be executed. + // This circumvents the limitation that it is not possible to call HIP methods within the HIP callback thread. + // The HIP thread signals the std::thread when it is ready to execute the task. + // The HIP thread is waiting for the std::thread to signal that it is finished executing the task + // before it executes the next task in the queue (HIP stream). + std::thread t( + [pCallbackSynchronizationData, + task +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + ,&queue // requires queue's destructor to wait for all tasks +#endif + ](){ + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + // thread-safe task execution and callee decrementing + std::lock_guard guard(queue.m_mutex); +#endif + + // If the callback has not yet been called, we wait for it. + { + std::unique_lock lock(pCallbackSynchronizationData->m_mutex); + if(pCallbackSynchronizationData->state != CallbackState::notified) + { + pCallbackSynchronizationData->m_event.wait( + lock, + [pCallbackSynchronizationData](){ + return pCallbackSynchronizationData->state == CallbackState::notified; + } + ); + } + + task(); + + // Notify the waiting HIP thread. + pCallbackSynchronizationData->state = CallbackState::finished; + } + pCallbackSynchronizationData->m_event.notify_one(); +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + queue.m_callees -= 1; +#endif + } + ); + if(isBlocking) + t.join(); // => waiting for task completion + else + t.detach(); // => do not wait for task completion + } +}; +//############################################################################# +//! The HIP RT non-blocking queue test trait specialization. +struct Empty +{ + //----------------------------------------------------------------------------- + template + static auto empty( + QueueHipRt const & queue) + -> bool + { + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + return (queue.m_callees==0); +#else + + // Query is allowed even for queues on non current device. + hipError_t ret = hipSuccess; + HIP_ASSERT_IGNORE( + ret = hipStreamQuery( + queue.m_HipQueue), + hipErrorNotReady); + return (ret == hipSuccess); +#endif + } +}; + +template +auto currentThreadWaitFor(QueueHipRt const & queue) -> void +{ +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + while(queue.m_callees>0) { + std::this_thread::sleep_for(std::chrono::milliseconds(10u)); + } +#else + // Sync is allowed even for queues on non current device. + HIP_ASSERT( hipStreamSynchronize( + queue.m_HipQueue)); +#endif +} + + + + +// --- Tests + +#define TEMPLATE_LIST_TEST_CASE(TestName) \ +template static void TestName (std::atomic &check); \ +static int TestName##Runner () { \ + std::atomic check{0}; \ + TestName< QueueHipRt >(check); \ + fprintf(stderr, "After " #TestName " < QueueHipRt > errors=%d\n", check.load()); \ + TestName< QueueHipRt >(check); \ + fprintf(stderr, "After " #TestName " < QueueHipRt > errors=%d\n", check.load()); \ + return check.load(); \ +} \ +template static void TestName (std::atomic &check) + +// add 1 if a check fails +#define CHECK(result) do{int arg=(!(result)); fprintf(stderr, "Checking " #result " %d\n", arg); check.fetch_add(arg);}while(false) + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueIsInitiallyEmpty ) +{ + TestType queue{0}; + CHECK(Empty::empty(queue)); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueCallbackIsWorking ) +{ + std::promise promise; + auto task = [&](){ promise.set_value(true); }; + TestType queue{0}; + Enqueue enqueue; + enqueue.enqueue( + queue, + task + ); + + CHECK(promise.get_future().get()); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueWaitShouldWork ) +{ + bool CallbackFinished = false; + auto task = + [&CallbackFinished]() noexcept + { + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CallbackFinished = true; + }; + TestType queue{0}; + Enqueue enqueue; + enqueue.enqueue( + queue, + task + ); + + currentThreadWaitFor(queue); + CHECK(CallbackFinished); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueShouldNotBeEmptyWhenLastTaskIsStillExecutingAndIsEmptyAfterProcessingFinished ) +{ + bool CallbackFinished = false; + TestType queue{0}; + auto task = [&queue, &CallbackFinished, &check]() noexcept + { + CHECK(!Empty::empty(queue)); + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CallbackFinished = true; + }; + Enqueue enqueue; + enqueue.enqueue( + queue, + task + ); + // A non-blocking queue will always stay empty because the task has been executed immediately. + if(!TestType::isBlocking) + { + currentThreadWaitFor(queue); + } + + CHECK(Empty::empty(queue)); + CHECK(CallbackFinished); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueShouldNotExecuteTasksInParallel ) +{ + std::atomic taskIsExecuting(false); + std::promise firstTaskFinished; + std::future firstTaskFinishedFuture = firstTaskFinished.get_future(); + std::promise secondTaskFinished; + std::future secondTaskFinishedFuture = secondTaskFinished.get_future(); + + TestType queue{0}; + + std::thread thread1( + [&queue, &taskIsExecuting, &firstTaskFinished, &check]() + { + auto task1 = [&taskIsExecuting, &firstTaskFinished, &check]() noexcept + { + CHECK(!taskIsExecuting.exchange(true)); + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CHECK(taskIsExecuting.exchange(false)); + firstTaskFinished.set_value(); + }; + Enqueue enqueue; + enqueue.enqueue( + queue, + task1 + ); + }); + + std::thread thread2( + [&queue, &taskIsExecuting, &secondTaskFinished, &check]() + { + auto task2 = [&taskIsExecuting, &secondTaskFinished, &check]() noexcept + { + CHECK(!taskIsExecuting.exchange(true)); + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CHECK(taskIsExecuting.exchange(false)); + secondTaskFinished.set_value(); + }; + + Enqueue enqueue; + enqueue.enqueue( + queue, + task2 + ); + }); + + // Both tasks have to be enqueued + thread1.join(); + thread2.join(); + + currentThreadWaitFor(queue); + + firstTaskFinishedFuture.get(); + secondTaskFinishedFuture.get(); +} + +#define TESTER(name) do { \ + int result = name (); \ + fprintf(stderr, #name " %s\n", result?"Errors":"No Errors"); \ + if (result) { failed(#name " failed\n"); } \ +} while (false) + +int main() +{ + TESTER(queueIsInitiallyEmptyRunner); + TESTER(queueCallbackIsWorkingRunner); + TESTER(queueWaitShouldWorkRunner); + TESTER(queueShouldNotBeEmptyWhenLastTaskIsStillExecutingAndIsEmptyAfterProcessingFinishedRunner); + TESTER(queueShouldNotExecuteTasksInParallelRunner); + passed(); +} From 28db1e4383878db51afbb614e6b6451983a157a4 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 19 Mar 2020 06:46:31 -0400 Subject: [PATCH 037/132] Let hipcc treat .c program as C program (#1945) nvcc treats .c program as C program and .cpp program as C++ program. Currently hipcc treats .c and .cpp programs as HIP programs. It is desirable to let hipcc behave like nvcc. Currently it is not feasible to let hipcc treat .cpp programs as C++ program since there are too many HIP applications use .cpp as extension for HIP programs. However we should be able let hipcc treat .c program C program since there are few applications use .c as extensioin for HIP programs. --- bin/hipcc | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 80d28a7b8e..38be6fb5ef 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -134,6 +134,7 @@ if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) { $HIP_VDI_HOME = $HIP_PATH; # use HIP_PATH } $HIPCXXFLAGS .= "-D__HIP_VDI__"; + $HIPCFLAGS .= "-D__HIP_VDI__"; } if (defined $HIP_VDI_HOME) { @@ -208,6 +209,7 @@ if ($HIP_PLATFORM eq "clang") { $HIPCXXFLAGS .= " -std=c++11"; } $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; + $HIPCFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPLDFLAGS .= " -L$HIP_LIB_PATH"; if (not $isWindows) { $HIPLDFLAGS .= " -Wl,--rpath-link=$HIP_LIB_PATH"; @@ -223,6 +225,7 @@ if ($HIP_PLATFORM eq "clang") { if ($HIP_RUNTIME eq "HCC" ) { $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; + $HIPCFLAGS .= " -isystem $HSA_PATH/include"; } else { $HIPCXXFLAGS .= " -fhip-new-launch-api"; } @@ -280,8 +283,11 @@ if ($HIP_PLATFORM eq "clang") { } $HIPCXXFLAGS .= " -isystem $HIP_PATH/include/hip/hcc_detail/cuda"; + $HIPCFLAGS .= " -isystem $HIP_PATH/include/hip/hcc_detail/cuda"; $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; + $HIPCFLAGS .= " -isystem $HSA_PATH/include"; $HIPCXXFLAGS .= " -Wno-deprecated-register"; + $HIPCFLAGS .= " -Wno-deprecated-register"; $HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am "; # $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport "; @@ -305,6 +311,7 @@ if ($HIP_PLATFORM eq "clang") { $HIPCC="$CUDA_PATH/bin/nvcc"; $HIPCXXFLAGS .= " -Wno-deprecated-gpu-targets "; $HIPCXXFLAGS .= " -isystem $CUDA_PATH/include"; + $HIPCFLAGS .= " -isystem $CUDA_PATH/include"; $HIPLDFLAGS = " -Wno-deprecated-gpu-targets -lcuda -lcudart -L$CUDA_PATH/lib64"; } else { @@ -314,11 +321,14 @@ if ($HIP_PLATFORM eq "clang") { # Add paths to common HIP includes: $HIPCXXFLAGS .= " -isystem $HIP_INCLUDE_PATH" ; +$HIPCFLAGS .= " -isystem $HIP_INCLUDE_PATH" ; my $compileOnly = 0; my $needCXXFLAGS = 0; # need to add CXX flags to compile step +my $needCFLAGS = 0; # need to add C flags to compile step my $needLDFLAGS = 1; # need to add LDFLAGS to compile step. -my $hasC = 0; # options contain a c-style file (NVCC must force recognition as GPU file) +my $hasC = 0; # options contain a c-style file +my $hasCXX = 0; # options contain a cpp-style file (NVCC must force recognition as GPU file) my $hasCU = 0; # options contain a cu-style file (HCC must force recognition as GPU file) my $needHipHcc = ($HIP_PLATFORM eq 'hcc'); # set if we need to link hip_hcc.o from src tree. (some builds, ie cmake, provide their own) my $printHipVersion = 0; # print HIP version @@ -384,7 +394,6 @@ foreach $arg (@ARGV) my $swallowArg = 0; if ($arg eq '-c' or $arg eq '--genco') { $compileOnly = 1; - $needCXXFLAGS = 1; $needLDFLAGS = 0; } @@ -447,6 +456,7 @@ foreach $arg (@ARGV) } if($trimarg eq '-use_fast_math') { $HIPCXXFLAGS .= " -DHIP_FAST_MATH "; + $HIPCFLAGS .= " -DHIP_FAST_MATH "; } if(($trimarg eq '-use-staticlib') and ($setLinkType eq 0)) { @@ -596,14 +606,19 @@ foreach $arg (@ARGV) #print "O: <$arg>\n"; } else { # input files and libraries - if (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.c$/) or ($arg =~ /\.cc$/) ) { + if ($arg =~ /\.c$/) { $hasC = 1; + $needCFLAGS = 1; + $toolArgs .= " -x c" + } + elsif (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.cc$/) ) { + $hasCXX = 1; $needCXXFLAGS = 1; - if ($HIP_PLATFORM eq 'clang') { + if ($HIP_PLATFORM eq 'clang' and not $arg =~ /\.c$/) { $toolArgs .= " -x hip" } } - if (($arg =~ /\.cu$/) or ($arg =~ /\.cuh$/) or ($arg =~ /\.hip$/)) { + elsif (($arg =~ /\.cu$/) or ($arg =~ /\.cuh$/) or ($arg =~ /\.hip$/)) { $hasCU = 1; $needCXXFLAGS = 1; if ($HIP_PLATFORM eq 'clang') { @@ -648,7 +663,7 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ my $archMacro = ' -D__HIP_ARCH_' . uc($val) . '__=1 '; # Add the arch option and macro to the compiler options. $GPU_ARCH_ARG = $GPU_ARCH_OPT . $val; - $HIPLDFLAGS .= $GPU_ARCH_ARG; + $HIPLDARCHFLAGS .= $GPU_ARCH_ARG; $HIPCXXFLAGS .= $archMacro; if ($HIP_PLATFORM eq 'clang') { $HIPCXXFLAGS .= $GPU_ARCH_ARG; @@ -676,7 +691,7 @@ if ($coFormatv3 and $HIP_PLATFORM eq 'hcc') { $HIPCXXFLAGS .= " -mcode-object-v3"; } -if ($hasC and $HIP_PLATFORM eq 'nvcc') { +if ($hasCXX and $HIP_PLATFORM eq 'nvcc') { $HIPCXXFLAGS .= " -x cu"; } if ($hasCU and $HIP_PLATFORM eq 'hcc') { @@ -685,6 +700,7 @@ if ($hasCU and $HIP_PLATFORM eq 'hcc') { if ($buildDeps and $HIP_PLATFORM eq 'nvcc') { $HIPCXXFLAGS .= " -M -D__CUDACC__"; + $HIPCFLAGS .= " -M -D__CUDACC__"; } if ($buildDeps and $HIP_PLATFORM eq 'clang') { @@ -692,10 +708,14 @@ if ($buildDeps and $HIP_PLATFORM eq 'clang') { } # Add --hip-link only if there are no source files. -if (!$needCXXFLAGS and $HIP_PLATFORM eq 'clang') { +if (!$needCXXFLAGS and !$needCFLAGS and $HIP_PLATFORM eq 'clang') { $HIPLDFLAGS .= " --hip-link"; } +if (!$needCFLAGS and $HIP_PLATFORM eq 'clang') { + $HIPLDFLAGS .= $HIPLDARCHFLAGS; +} + if ($setStdLib eq 0 and $HIP_PLATFORM eq 'hcc') { $HIPCXXFLAGS .= $HCC_WA_FLAGS; @@ -718,6 +738,7 @@ if ($HIP_PLATFORM eq "clang") { # Set default optimization level to -O3 for hip-clang. if ($optArg eq "") { $HIPCXXFLAGS .= " -O3"; + $HIPCFLAGS .= " -O3"; $HIPLDFLAGS .= " -O3"; } # Do not pass -mllvm on Windows since there is a clang bug causing duplicate -mllvm options in clang -cc1 on Windows. @@ -738,6 +759,7 @@ if ($HIP_PLATFORM eq "clang") { if ($HIPCC_COMPILE_FLAGS_APPEND) { $HIPCXXFLAGS .= " $HIPCC_COMPILE_FLAGS_APPEND"; + $HIPCFLAGS .= " $HIPCC_COMPILE_FLAGS_APPEND"; } if ($HIPCC_LINK_FLAGS_APPEND) { $HIPLDFLAGS .= " $HIPCC_LINK_FLAGS_APPEND"; @@ -747,6 +769,9 @@ my $CMD="$HIPCC"; if ($needLDFLAGS and not $compileOnly) { $CMD .= " $HIPLDFLAGS"; } +if ($needCFLAGS) { + $CMD .= " $HIPCFLAGS"; +} if ($needCXXFLAGS) { $CMD .= " $HIPCXXFLAGS"; } From b4c69a2e4a2e68ecdc08264c253517cb1b50b908 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Mon, 23 Mar 2020 12:16:24 -0500 Subject: [PATCH 038/132] Update hip_runtime_api.h (#1966) Correct URL for deprecated api list --- include/hip/hcc_detail/hip_runtime_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 2c6726c161..9103b7e3ff 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -55,7 +55,7 @@ THE SOFTWARE. #define DEPRECATED(msg) __attribute__ ((deprecated(msg))) #endif // !defined(_MSC_VER) -#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases.For more details please refer https://github.com/ROCm-Developer-Tools/HIP/tree/master/docs/markdown/hip_deprecated_api_list" +#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases. For more details please refer https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_deprecated_api_list.md" #if defined(__HCC__) && (__hcc_workweek__ < 16155) #error("This version of HIP requires a newer version of HCC."); From f61b79d9a3e3c15ba08e6cfc03e3c3de11646feb Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Wed, 25 Mar 2020 16:39:24 -0500 Subject: [PATCH 039/132] Fix cooperative launch APIs to set hipGetLastError (#1935) * Fix cooperative launch APIs to set hipGetLastError Previously, the cooperative launch APIs did not properly log their errors in the global hipGetLastError variable before returning back to the user. As such, the APIs would leave hipSuccess in the last error, which would break some use cases. This fixes that problem by making a trampoline function that does the HIP_INIT_API and ihipLogStatus. * Add missing flag to the log of multi-GPU launch --- .../hip/hcc_detail/functional_grid_launch.hpp | 31 +++++++------ src/hip_module.cpp | 46 +++++++++++++++---- 2 files changed, 54 insertions(+), 23 deletions(-) diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index 76a04fa355..5abe1095df 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -37,14 +37,15 @@ THE SOFTWARE. hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags, hip_impl::program_state& ps); -hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, void** kernelParams, - unsigned int sharedMemBytes, hipStream_t stream, hip_impl::program_state& ps); - -hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, - unsigned int flags, hip_impl::program_state& ps); - - +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, + dim3 blockDim, void** args, + size_t sharedMem, hipStream_t stream, + hip_impl::program_state& ps); +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags, + hip_impl::program_state& ps); #pragma GCC visibility push(hidden) @@ -202,22 +203,24 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, template inline __attribute__((visibility("hidden"))) -hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDimX, void** kernelParams, - unsigned int sharedMemBytes, hipStream_t stream) { - +hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMem, + hipStream_t stream) { hip_impl::hip_init(); auto& ps = hip_impl::get_program_state(); - return ihipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, blockDimX, kernelParams, sharedMemBytes, stream, ps); + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, + blockDim, args, sharedMem, stream, ps); } inline __attribute__((visibility("hidden"))) -hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, - unsigned int flags) { +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags) { hip_impl::hip_init(); auto& ps = hip_impl::get_program_state(); - return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps); + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps); } #pragma GCC visibility pop diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 1130dec26a..794bf868e5 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -495,9 +495,8 @@ __global__ void init_gws(uint nwm1) { } } -__attribute__((visibility("default"))) hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, - dim3 blockDimX, void** kernelParams, unsigned int sharedMemBytes, + dim3 blockDim, void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream, hip_impl::program_state& ps) { #if (__hcc_workweek__ >= 20093) @@ -515,9 +514,9 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, return hipErrorInvalidConfiguration; } - size_t globalWorkSizeX = (size_t)gridDim.x * (size_t)blockDimX.x; - size_t globalWorkSizeY = (size_t)gridDim.y * (size_t)blockDimX.y; - size_t globalWorkSizeZ = (size_t)gridDim.z * (size_t)blockDimX.z; + size_t globalWorkSizeX = (size_t)gridDim.x * (size_t)blockDim.x; + size_t globalWorkSizeY = (size_t)gridDim.y * (size_t)blockDim.y; + size_t globalWorkSizeZ = (size_t)gridDim.z * (size_t)blockDim.z; if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) { return hipErrorInvalidConfiguration; @@ -603,10 +602,10 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, // launch the main kernel in the cooperative queue result = ihipModuleLaunchKernel(tls, kd, - gridDim.x * blockDimX.x, - gridDim.y * blockDimX.y, - gridDim.z * blockDimX.z, - blockDimX.x, blockDimX.y, blockDimX.z, + gridDim.x * blockDim.x, + gridDim.y * blockDim.y, + gridDim.z * blockDim.z, + blockDim.x, blockDim.y, blockDim.z, sharedMemBytes, stream, kernelParams, nullptr, nullptr, nullptr, 0, true, impCoopParams, &coopAV); @@ -631,6 +630,20 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, } __attribute__((visibility("default"))) +hipError_t hipLaunchCooperativeKernel(const void* func, dim3 gridDim, + dim3 blockDim, void** args, + size_t sharedMem, hipStream_t stream, + hip_impl::program_state& ps) { + + // Skipping passing in ps, because the logging function does not like it + HIP_INIT_API(hipLaunchCooperativeKernel, func, gridDim, blockDim, args, + sharedMem, stream); + + return ihipLogStatus(ihipLaunchCooperativeKernel(func, gridDim, blockDim, + args, sharedMem, stream, ps)); +} + + hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags, hip_impl::program_state& ps) { @@ -875,6 +888,21 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL #endif } +__attribute__((visibility("default"))) +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags, + hip_impl::program_state& ps) { + + // Skipping passing in ps, because the logging function does not like it + HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, + numDevices, flags); + + return ihipLogStatus(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, + numDevices, + flags, ps)); +} + namespace hip_impl { hsa_executable_t executable_for(hipModule_t hmod) { return hmod->executable; From 3bef4e60ada012317a051a949039350fdc8e26ef Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 25 Mar 2020 17:41:27 -0400 Subject: [PATCH 040/132] Add option --hipcc-func-supp for function support (#1957) By default hipcc passes -mllvm options to let HIP-Clang inline all device functions. --hipcc-func-supp enables function support and disables inline all. --hipcc-no-func-supp disable function support and enables inline all. This is a temporary solution to match HCC behavior for performance. This option is mainly for debugging purpose. Change-Id: I0c44ac1812bb3cea5c3e5b6e14ebaa45919236f6 --- bin/hipcc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/hipcc b/bin/hipcc index 38be6fb5ef..74ca844629 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -337,6 +337,7 @@ my $buildDeps = 0; my $linkType = 1; my $setLinkType = 0; my $coFormatv3 = 0; +my $funcSupp = 0; # enable function support my @options = (); my @inputs = (); @@ -600,6 +601,11 @@ foreach $arg (@ARGV) #if $arg eq "--hipcc_profile") { # Example argument here, hipcc # #} + if ($arg eq "--hipcc-func-supp") { + $funcSupp = 1; + } elsif ($arg eq "--hipcc-no-func-supp") { + $funcSupp = 0; + } } else { push (@options, $arg); } @@ -743,7 +749,7 @@ if ($HIP_PLATFORM eq "clang") { } # Do not pass -mllvm on Windows since there is a clang bug causing duplicate -mllvm options in clang -cc1 on Windows. # ToDo : remove restriction for Windows after clang bug is fixed. - if ($optArg ne "-O0" and not $isWindows) { + if (!$funcSupp and $optArg ne "-O0" and not $isWindows) { $HIPCXXFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; if ($needLDFLAGS and not $needCXXFLAGS) { $HIPLDFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; From 855209abed90fa38cac47bc5c1a7977e67df2958 Mon Sep 17 00:00:00 2001 From: Paul Fultz II Date: Wed, 25 Mar 2020 16:42:48 -0500 Subject: [PATCH 041/132] Fix path for hip-clang when using hipcc (#1961) * Fix path for hip-clang when using hipcc * Fix typo * Update regex --- hip-config-clang.cmake.in | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/hip-config-clang.cmake.in b/hip-config-clang.cmake.in index 583444673d..67dcb14695 100644 --- a/hip-config-clang.cmake.in +++ b/hip-config-clang.cmake.in @@ -50,14 +50,24 @@ set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") -get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) -get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) +if(CMAKE_CXX_COMPILER MATCHES ".*hipcc") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + OUTPUT_STRIP_TRAILING_WHITESPACE + OUTPUT_VARIABLE HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT) + if(HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT MATCHES "InstalledDir:[\t\r\n][\t\r\n]*([^\t\r\n])") + set(HIP_CLANG_ROOT ${CMAKE_MATCH_1}) + else() + set(HIP_CLANG_ROOT /opt/rocm/llvm) + endif() +else() + get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) + get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) +endif() file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) find_path(HIP_CLANG_INCLUDE_PATH stddef.h HINTS ${HIP_CLANG_INCLUDE_SEARCH_PATHS} NO_DEFAULT_PATH) - find_dependency(amd_comgr) find_dependency(AMDDeviceLibs) set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "AMD GPU targets to compile for") From 37873d12046854dd32addbc2ec86ba056494bb5d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 26 Mar 2020 11:07:43 +0300 Subject: [PATCH 042/132] [HIPIFY][doc] Update README.md: LLVM 10.0.0 Release is supported --- hipify-clang/README.md | 243 +++++++++++++++++++++-------------------- 1 file changed, 122 insertions(+), 121 deletions(-) diff --git a/hipify-clang/README.md b/hipify-clang/README.md index bfa017def2..8cdeceace0 100644 --- a/hipify-clang/README.md +++ b/hipify-clang/README.md @@ -42,10 +42,9 @@ After applying all the matchers, the output HIP source is produced. `hipify-clang` requires: -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [3.8.0](http://releases.llvm.org/download.html#3.8.0); the latest stable and recommended release: [**9.0.1**](http://releases.llvm.org/download.html#9.0.1), the latest release candidate: [10.0.0-rc4](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc4). +1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [3.8.0](http://releases.llvm.org/download.html#3.8.0); the latest stable and recommended release: [**10.0.0**](http://releases.llvm.org/download.html#10.0.0). -2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [7.0](https://developer.nvidia.com/cuda-toolkit-70), the latest supported version is [**10.1 Update 2**](https://developer.nvidia.com/cuda-10.1-download-archive-base). -To use the latest CUDA version [10.2](https://developer.nvidia.com/cuda-downloads) please use the latest `LLVM` release candidate: [10.0.0-rc4](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc4). +2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [7.0](https://developer.nvidia.com/cuda-toolkit-70), the latest supported version is [**10.2**](https://developer.nvidia.com/cuda-downloads). | **LLVM release version** | **CUDA latest supported version** | **Windows** | **Linux** | |:----------------------------------------------------------:|:------------------------------------------------------------------------:|:-----------:|:---------:| @@ -66,15 +65,15 @@ To use the latest CUDA version [10.2](https://developer.nvidia.com/cuda-download | [8.0.0](http://releases.llvm.org/download.html#8.0.0) | [10.0](https://developer.nvidia.com/cuda-10.0-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_8.0.0_bug_38811.zip)*
| + | | [8.0.1](http://releases.llvm.org/download.html#8.0.1) | [10.0](https://developer.nvidia.com/cuda-10.0-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_8.0.1_bug_38811.zip)*
| + | | [9.0.0](http://releases.llvm.org/download.html#9.0.0) | [10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) | + | + | -| [**9.0.1**](http://releases.llvm.org/download.html#9.0.1) | [**10.1**](https://developer.nvidia.com/cuda-10.1-download-archive-base) | +
**LATEST STABLE RELEASE** | +
**LATEST STABLE RELEASE** | -| [10.0.0-rc4](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc4) | [10.2](https://developer.nvidia.com/cuda-downloads) | + | + | +| [9.0.1](http://releases.llvm.org/download.html#9.0.1) | [10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) | + | + | +| [**10.0.0**](http://releases.llvm.org/download.html#10.0.0)| [**10.2**](https://developer.nvidia.com/cuda-downloads) | +
**LATEST STABLE RELEASE** | +
**LATEST STABLE RELEASE** | `*` Download the patch and unpack it into your `LLVM` distributive directory; a few header files will be overwritten; rebuilding of `LLVM` is not needed. In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=f:\LLVM\9.0.1\dist` +[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\10.0.0\dist` ### hipify-clang: usage @@ -83,14 +82,14 @@ To process a file, `hipify-clang` needs access to the same headers that would be For example: ```shell -./hipify-clang square.cu --cuda-path=/usr/local/cuda-10.1 -I /usr/local/cuda-10.1/samples/common/inc +./hipify-clang square.cu --cuda-path=/usr/local/cuda-10.2 -I /usr/local/cuda-10.2/samples/common/inc ``` `hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you were compiling the input file. For example: ```bash -./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-10.1 -- -std=c++17 +./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-10.2 -- -std=c++17 ``` The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful. @@ -158,7 +157,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build pro **LLVM 10.0.0 or newer:** -1. download [`LLVM project`](https://github.com/llvm/llvm-project/archive/llvmorg-10.0.0-rc4.tar.gz) sources; +1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/download/llvmorg-10.0.0/llvm-project-10.0.0.tar.xz) sources; 2. build [`LLVM project`](http://llvm.org/docs/CMake.html): **Linux**: @@ -193,19 +192,19 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build pro * Having multiple CUDA installations to choose a particular version the `DCUDA_TOOLKIT_ROOT_DIR` option should be specified: - - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.1` + - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.2` - - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1"` + - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2"` - `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.1"` + `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.2"` 4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed. * Path to cuDNN should be specified by the `CUDA_DNN_ROOT_DIR` option: - - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.1-v7.6.5.32` + - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.2-v7.6.5.32` - - ***Windows***: `-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.5.32` + - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-10.2-windows10-x64-v7.6.5.32` 5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed. @@ -213,7 +212,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_CUB_ROOT_DIR=/srv/git/CUB` - - ***Windows***: `-DCUDA_CUB_ROOT_DIR=f:/GIT/cub` + - ***Windows***: `-DCUDA_CUB_ROOT_DIR=d:/GIT/cub` 5. Ensure [`python`](https://www.python.org/downloads) of minimum required version 2.7 is installed. @@ -221,21 +220,21 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build pro * Install `lit` into `python`: - - ***Linux***: `python /srv/git/LLVM/9.0.1/llvm/utils/lit/setup.py install` + - ***Linux***: `python /srv/git/LLVM/10.0.0/llvm/utils/lit/setup.py install` - - ***Windows***: `python f:/LLVM/9.0.1/llvm/utils/lit/setup.py install` + - ***Windows***: `python d:/LLVM/10.0.0/llvm/utils/lit/setup.py install` * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/srv/git/LLVM/9.0.1/build/bin/llvm-lit` + - ***Linux***: `-DLLVM_EXTERNAL_LIT=/srv/git/LLVM/10.0.0/build/bin/llvm-lit` - - ***Windows***: `-DLLVM_EXTERNAL_LIT=f:/LLVM/9.0.1/build/Release/bin/llvm-lit.py` + - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/10.0.0/build/Release/bin/llvm-lit.py` * `FileCheck`: - - ***Linux***: copy from `/srv/git/LLVM/9.0.1/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Linux***: copy from `/srv/git/LLVM/10.0.0/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - ***Windows***: copy from `f:/LLVM/9.0.1/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Windows***: copy from `d:/LLVM/10.0.0/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option @@ -249,7 +248,7 @@ On Linux the following configurations are tested: Ubuntu 14: LLVM 5.0.0 - 6.0.1, CUDA 7.0 - 9.0, cudnn-5.0.5 - cudnn-7.6.5.32 -Ubuntu 16-18: LLVM 8.0.0 - 10.0.0-rc4, CUDA 8.0 - 10.2, cudnn-5.1.10 - cudnn-7.6.5.32 +Ubuntu 16-18: LLVM 8.0.0 - 10.0.0, CUDA 8.0 - 10.2, cudnn-5.1.10 - cudnn-7.6.5.32 Minimum build system requirements for the above configurations: @@ -262,11 +261,11 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/srv/git/LLVM/9.0.1/dist \ - -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.1 \ - -DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.1-v7.6.5.32 \ + -DCMAKE_PREFIX_PATH=/srv/git/LLVM/10.0.0/dist \ + -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.2 \ + -DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.2-v7.6.5.32 \ -DCUDA_CUB_ROOT_DIR=/srv/git/CUB \ - -DLLVM_EXTERNAL_LIT=/srv/git/LLVM/9.0.1/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/srv/git/LLVM/10.0.0/build/bin/llvm-lit \ .. ``` *A corresponding successful output:* @@ -285,14 +284,14 @@ cmake -- Detecting CXX compiler ABI info - done -- Detecting CXX compile features -- Detecting CXX compile features - done --- Found LLVM 9.0.1: --- - CMake module path: /srv/git/LLVM/9.0.1/dist/lib/cmake/llvm --- - Include path : /srv/git/LLVM/9.0.1/dist/include --- - Binary path : /srv/git/LLVM/9.0.1/dist/bin +-- Found LLVM 10.0.0: +-- - CMake module path: /srv/git/LLVM/10.0.0/dist/lib/cmake/llvm +-- - Include path : /srv/git/LLVM/10.0.0/dist/include +-- - Binary path : /srv/git/LLVM/10.0.0/dist/bin -- Linker detection: GNU ld -- Found PythonInterp: /usr/bin/python2.7 (found suitable version "2.7.12", minimum required is "2.7") -- Found lit: /usr/local/bin/lit --- Found FileCheck: /srv/git/LLVM/9.0.1/dist/bin/FileCheck +-- Found FileCheck: /srv/git/LLVM/10.0.0/dist/bin/FileCheck -- Looking for pthread.h -- Looking for pthread.h - found -- Looking for pthread_create @@ -302,7 +301,7 @@ cmake -- Looking for pthread_create in pthread -- Looking for pthread_create in pthread - found -- Found Threads: TRUE --- Found CUDA: /usr/local/cuda-10.1 (found version "10.1") +-- Found CUDA: /usr/local/cuda-10.2 (found version "10.2") -- Configuring done -- Generating done -- Build files have been written to: /srv/git/HIP/hipify-clang/build @@ -314,83 +313,85 @@ make test-hipify ```shell Running HIPify regression tests ======================================== -CUDA 10.1 - will be used for testing -LLVM 9.0.1 - will be used for testing +CUDA 10.2 - will be used for testing +LLVM 10.0.0 - will be used for testing x86_64 - Platform architecture Linux 5.2.0 - Platform OS 64 - hipify-clang binary bitness 64 - python 2.7.12 binary bitness ======================================== --- Testing: 67 tests, 12 threads -- -PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 67) -PASS: hipify :: unit_tests/device/math_functions.cu (2 of 67) -PASS: hipify :: unit_tests/device/atomics.cu (3 of 67) -PASS: hipify :: unit_tests/device/device_symbols.cu (4 of 67) -PASS: hipify :: unit_tests/headers/headers_test_01.cu (5 of 67) -PASS: hipify :: unit_tests/headers/headers_test_02.cu (6 of 67) -PASS: hipify :: unit_tests/headers/headers_test_03.cu (7 of 67) -PASS: hipify :: unit_tests/headers/headers_test_05.cu (8 of 67) -PASS: hipify :: unit_tests/headers/headers_test_04.cu (9 of 67) -PASS: hipify :: unit_tests/headers/headers_test_06.cu (10 of 67) -PASS: hipify :: unit_tests/headers/headers_test_07.cu (11 of 67) -PASS: hipify :: unit_tests/headers/headers_test_10.cu (12 of 67) -PASS: hipify :: unit_tests/headers/headers_test_11.cu (13 of 67) -PASS: hipify :: unit_tests/headers/headers_test_08.cu (14 of 67) -PASS: hipify :: unit_tests/kernel_launch/kernel_launch_01.cu (15 of 67) -PASS: hipify :: unit_tests/headers/headers_test_09.cu (16 of 67) -PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (17 of 67) -PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (18 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (19 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (20 of 67) -PASS: hipify :: unit_tests/libraries/CUB/cub_03.cu (21 of 67) -PASS: hipify :: unit_tests/libraries/CUB/cub_01.cu (22 of 67) -PASS: hipify :: unit_tests/libraries/CUB/cub_02.cu (23 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (24 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (25 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (26 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (27 of 67) -PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (28 of 67) -PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (29 of 67) -PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (30 of 67) -PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (31 of 67) -PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (32 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (33 of 67) -PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (34 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (35 of 67) -PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (36 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (37 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (38 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (39 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (40 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (41 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (42 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (43 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (44 of 67) -PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (45 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (46 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (47 of 67) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (48 of 67) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (49 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (50 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (51 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (52 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (53 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (54 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (55 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (56 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (57 of 67) -PASS: hipify :: unit_tests/samples/MallocManaged.cpp (58 of 67) -PASS: hipify :: unit_tests/samples/allocators.cu (59 of 67) -PASS: hipify :: unit_tests/samples/coalescing.cu (60 of 67) -PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (61 of 67) -PASS: hipify :: unit_tests/samples/axpy.cu (62 of 67) -PASS: hipify :: unit_tests/samples/intro.cu (63 of 67) -PASS: hipify :: unit_tests/samples/cudaRegister.cu (64 of 67) -PASS: hipify :: unit_tests/samples/square.cu (65 of 67) -PASS: hipify :: unit_tests/samples/static_shared_memory.cu (66 of 67) -PASS: hipify :: unit_tests/samples/vec_add.cu (67 of 67) -Testing Time: 3.07s - Expected Passes : 67 +-- Testing: 69 tests, 12 threads -- +PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 69) +PASS: hipify :: unit_tests/device/math_functions.cu (2 of 69) +PASS: hipify :: unit_tests/device/atomics.cu (3 of 69) +PASS: hipify :: unit_tests/headers/headers_test_01.cu (4 of 69) +PASS: hipify :: unit_tests/device/device_symbols.cu (5 of 69) +PASS: hipify :: unit_tests/headers/headers_test_02.cu (6 of 69) +PASS: hipify :: unit_tests/headers/headers_test_03.cu (7 of 69) +PASS: hipify :: unit_tests/headers/headers_test_05.cu (8 of 69) +PASS: hipify :: unit_tests/headers/headers_test_04.cu (9 of 69) +PASS: hipify :: unit_tests/headers/headers_test_07.cu (10 of 69) +PASS: hipify :: unit_tests/headers/headers_test_06.cu (11 of 69) +PASS: hipify :: unit_tests/headers/headers_test_11.cu (12 of 69) +PASS: hipify :: unit_tests/headers/headers_test_10.cu (13 of 69) +PASS: hipify :: unit_tests/headers/headers_test_08.cu (14 of 69) +PASS: hipify :: unit_tests/kernel_launch/kernel_launch_01.cu (15 of 69) +PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (16 of 69) +PASS: hipify :: unit_tests/headers/headers_test_09.cu (17 of 69) +PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (18 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (19 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (20 of 69) +PASS: hipify :: unit_tests/libraries/CUB/cub_03.cu (21 of 69) +PASS: hipify :: unit_tests/libraries/CUB/cub_01.cu (22 of 69) +PASS: hipify :: unit_tests/libraries/CUB/cub_02.cu (23 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (24 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (25 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (26 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (27 of 69) +PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (28 of 69) +PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (29 of 69) +PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (30 of 69) +PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (31 of 69) +PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (32 of 69) +PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (33 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (34 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (35 of 69) +PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (36 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (37 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (38 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (39 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (40 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (41 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (42 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (43 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (44 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (45 of 69) +PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (46 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (47 of 69) +PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (48 of 69) +PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (49 of 69) +PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu (50 of 69) +PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu (51 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (52 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (53 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (54 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (55 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (56 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (57 of 69) +PASS: hipify :: unit_tests/samples/MallocManaged.cpp (58 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (59 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (60 of 69) +PASS: hipify :: unit_tests/samples/allocators.cu (61 of 69) +PASS: hipify :: unit_tests/samples/coalescing.cu (62 of 69) +PASS: hipify :: unit_tests/samples/axpy.cu (63 of 69) +PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (64 of 69) +PASS: hipify :: unit_tests/samples/cudaRegister.cu (65 of 69) +PASS: hipify :: unit_tests/samples/intro.cu (66 of 69) +PASS: hipify :: unit_tests/samples/square.cu (67 of 69) +PASS: hipify :: unit_tests/samples/static_shared_memory.cu (68 of 69) +PASS: hipify :: unit_tests/samples/vec_add.cu (69 of 69) +Testing Time: 3.23s + Expected Passes : 69 [100%] Built target test-hipify ``` ### hipify-clang: Windows @@ -404,8 +405,8 @@ Testing Time: 3.07s | 7.0.0 - 7.1.0 | 9.2 | 7.6.5.32 | 2017.15.9.11 | 3.13.3 | 3.7.3 | | 8.0.0 - 8.0.1 | 10.0 | 7.6.5.32 | 2017.15.9.15 | 3.14.2 | 3.7.4 | | 9.0.0 - 9.0.1 | 10.1 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.4 | 3.8.0 | -| 10.0.0-rc1-rc4 | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.0 | 3.16.5 | 3.8.2 | -| 11.0.0git | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.0 | 3.16.5 | 3.8.2 | +| 10.0.0 | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.1 | 3.17.0 | 3.8.2 | +| 11.0.0git | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.1 | 3.17.0 | 3.8.2 | *Building with testing support on `Windows 10` by `Visual Studio 16 2019`:* @@ -416,28 +417,28 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=f:/LLVM/9.0.1/dist \ - -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1" \ - -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.1" \ - -DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.5.32 \ - -DCUDA_CUB_ROOT_DIR=f:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=f:/LLVM/9.0.1/build/Release/bin/llvm-lit.py \ + -DCMAKE_PREFIX_PATH=d:/LLVM/10.0.0/dist \ + -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2" \ + -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.2" \ + -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-10.2-windows10-x64-v7.6.5.32 \ + -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ + -DLLVM_EXTERNAL_LIT=d:/LLVM/10.0.0/build/Release/bin/llvm-lit.py \ -Thost=x64 .. ``` *A corresponding successful output:* ```shell --- Found LLVM 9.0.1: --- - CMake module path: F:/LLVM/9.0.1/dist/lib/cmake/llvm --- - Include path : F:/LLVM/9.0.1/dist/include --- - Binary path : F:/LLVM/9.0.1/dist/bin --- Found PythonInterp: C:/Program Files/Python38/python.exe (found suitable version "3.8.2", minimum required is "3.6") --- Found lit: C:/Program Files/Python38/Scripts/lit.exe --- Found FileCheck: F:/LLVM/9.0.1/dist/bin/FileCheck.exe --- Found CUDA: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1 (found version "10.1") +-- Found LLVM 10.0.0: +-- - CMake module path: d:/LLVM/10.0.0/dist/lib/cmake/llvm +-- - Include path : d:/LLVM/10.0.0/dist/include +-- - Binary path : d:/LLVM/10.0.0/dist/bin +-- Found PythonInterp: c:/Program Files/Python38/python.exe (found suitable version "3.8.2", minimum required is "3.6") +-- Found lit: c:/Program Files/Python38/Scripts/lit.exe +-- Found FileCheck: d:/LLVM/10.0.0/dist/bin/FileCheck.exe +-- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2 (found version "10.2") -- Configuring done -- Generating done --- Build files have been written to: f:/HIP/hipify-clang/build +-- Build files have been written to: d:/HIP/hipify-clang/build ``` Run `Visual Studio 16 2019`, open the generated `hipify-clang.sln`, build project `test-hipify`. From ee5fa8977c2f341dfb1fee930463be9b5027e47e Mon Sep 17 00:00:00 2001 From: Jatin Chaudhary <51944368+cjatin@users.noreply.github.com> Date: Thu, 26 Mar 2020 17:03:43 +0530 Subject: [PATCH 043/132] [dtest] Adding VectorTest (#1732) Adding unit test for operations of device vector types --- tests/src/deviceLib/hip_floatnTM.cpp | 239 +++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 tests/src/deviceLib/hip_floatnTM.cpp diff --git a/tests/src/deviceLib/hip_floatnTM.cpp b/tests/src/deviceLib/hip_floatnTM.cpp new file mode 100644 index 0000000000..921933636f --- /dev/null +++ b/tests/src/deviceLib/hip_floatnTM.cpp @@ -0,0 +1,239 @@ +/* +Copyright (c) 2015-2019 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc HIPCC_OPTIONS -std=c++14 + * TEST: %t + * HIT_END + */ + +#include +#include +#include +#include "test_common.h" + +static std::random_device dev; +static std::mt19937 rng(dev()); + +template +__host__ __device__ inline constexpr int count() { + return sizeof(T) / sizeof(M); +} + +inline float getRandomFloat(float min = 10, float max = 100) { + std::uniform_real_distribution gen(min, max); + return gen(rng); +} + +template +void fillMatrix(T* a, int size) { + for (int i = 0; i < size; i++) { + T t; + t.x = getRandomFloat(); + if constexpr (count() >= 2) t.y = getRandomFloat(); + if constexpr (count() >= 3) t.z = getRandomFloat(); + if constexpr (count() >= 4) t.w = getRandomFloat(); + + a[i] = t; + } +} + +// Test operations +template +__host__ __device__ void testOperations(T& a, T& b) { + a.x += b.x; + a.x++; + b.x++; + if constexpr (count() >= 2) { + a.y = b.x; + a.x = b.y; + } + if constexpr (count() >= 3) { + if (a.x > 0) b.x /= a.x; + a.x *= b.z; + a.y--; + } + if constexpr (count() >= 4) { + b.w = a.x; + a.w += (-b.y); + } +} + +template +__global__ void testOperationsGPU(T* d_a, T* d_b, int size) { + int id = threadIdx.x; + if (id > size) return; + T &a = d_a[id]; + T &b = d_b[id]; + + testOperations(a, b); +} + + +template +void dcopy(T* a, T* b, int size) { + for (int i = 0; i < size; i++) { + a[i] = b[i]; + } +} + +template +bool isEqual(T* a, T* b, int size) { + for (int i = 0; i < size; i++) { + if (a[i] != b[i]) { + return false; + } + } + return true; +} + +// Main function that tests type +// T = what you want to test +// D = pack of 1 i.e. float1 int1 +template +void testType(int msize) { + T *fa, *fb, *fc, *h_fa, *h_fb; + fa = new T[msize]; + fb = new T[msize]; + fc = new T[msize]; + h_fa = new T[msize]; + h_fb = new T[msize]; + + T *d_fa, *d_fb; + + constexpr int c = count(); + + if (c <= 0 || c >= 5) { + failed("Invalid Size\n"); + } + + fillMatrix(fa, msize); + dcopy(fb, fa, msize); + dcopy(h_fa, fa, msize); + dcopy(h_fb, fa, msize); + for (int i = 0; i < msize; i++) testOperations(h_fa[i], h_fb[i]); + + hipMalloc(&d_fa, sizeof(T) * msize); + hipMalloc(&d_fb, sizeof(T) * msize); + + hipMemcpy(d_fa, fa, sizeof(T) * msize, hipMemcpyHostToDevice); + hipMemcpy(d_fb, fb, sizeof(T) * msize, hipMemcpyHostToDevice); + + auto kernel = testOperationsGPU; + hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize); + + hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost); + + bool pass = true; + if (!isEqual(h_fa, fc, msize)) { + pass = false; + } + + delete[] fa; + delete[] fb; + delete[] fc; + delete[] h_fa; + delete[] h_fb; + hipFree(d_fa); + hipFree(d_fb); + + if (!pass) { + failed("Failed"); + } +} + +int main() { + const int msize = 100; + // double + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // floats + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ints + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // chars + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // long + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // longlong + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // short + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // uints + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // uchars + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ulong + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ulonglong + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ushort + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + passed(); +} From 3e363047d544d03f0fbe07dcfbc9b231c7974819 Mon Sep 17 00:00:00 2001 From: Sarbojit2019 <52527887+SarbojitAMD@users.noreply.github.com> Date: Thu, 26 Mar 2020 17:04:43 +0530 Subject: [PATCH 044/132] Fix for segfault seen if invalid kind is passed to hipMemcpy (#1937) Fixes SWDEV-224941 --- src/hip_memory.cpp | 24 +++++++-- .../memory/hipMemcpyNegetiveTests.cpp | 53 +++++++++++++++++++ 2 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 4a126532f4..3c9b1616af 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -320,20 +320,34 @@ void generic_copy(void* __restrict dst, const void* __restrict src, size_t n, inline void memcpy_impl(void* __restrict dst, const void* __restrict src, size_t n, hipMemcpyKind k) { + auto si{info(src)}; + auto di{info(dst)}; + + if (!is_large_BAR){ + // Pointer info takes presidence over hipMemcpyKind + // if there is mismatch b/w Memcpy kind and dst/src pointer + // E.g. dst(host pointer),src(device pointer) and hipMemcpyKind set as hipMemcpyHostToDevice + if (di.size == is_cpu_owned && si.size == is_cpu_owned) + k = hipMemcpyHostToHost; + else if (si.size == is_cpu_owned && di.size != is_cpu_owned) + k = hipMemcpyHostToDevice; + else if (di.size == is_cpu_owned && si.size != is_cpu_owned) + k = hipMemcpyDeviceToHost; + else + k = hipMemcpyDeviceToDevice; + } switch (k) { case hipMemcpyHostToHost: std::memcpy(dst, src, n); break; - case hipMemcpyHostToDevice: return h2d_copy(dst, src, n, info(dst)); - case hipMemcpyDeviceToHost: return d2h_copy(dst, src, n, info(src)); + case hipMemcpyHostToDevice: return h2d_copy(dst, src, n, di); + case hipMemcpyDeviceToHost: return d2h_copy(dst, src, n, si); case hipMemcpyDeviceToDevice: { - const auto di{info(dst)}; - const auto si{info(src)}; throwing_result_check(hsa_amd_agents_allow_access(1u, &si.agentOwner, nullptr, di.agentBaseAddress), __FILE__, __func__, __LINE__); return do_copy(dst, src, n, di.agentOwner, si.agentOwner); } - default: return generic_copy(dst, src, n, info(dst), info(src)); + default: return generic_copy(dst, src, n, di, si); } } diff --git a/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp new file mode 100644 index 0000000000..692d14cec7 --- /dev/null +++ b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + + +#include "test_common.h" + +int main() { + int* A; + int* Ad; + int* Bd; + + // Allocation + HIPCHECK(hipMalloc((void**)&Ad, sizeof(int))); + HIPCHECK(hipMalloc((void**)&Bd, sizeof(int))); + HIPCHECK(hipHostMalloc((void**)&A,sizeof(int))); + + // Kind should be ignored and test should pass even for incorrect kind + HIPCHECK(hipMemcpy(Ad, A, sizeof(int), hipMemcpyDeviceToHost)); + HIPCHECK(hipMemcpy(A, Ad, sizeof(int), hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(Ad, Bd, sizeof(int), hipMemcpyHostToHost)); + HIPCHECK(hipMemcpy(A, A, sizeof(int), hipMemcpyDeviceToDevice)); + + // nullptr passed as source or destination pointer + HIPASSERT(hipSuccess != hipMemcpy(nullptr, A, sizeof(int), hipMemcpyHostToDevice)); + HIPASSERT(hipSuccess != hipMemcpy(Ad, nullptr, sizeof(int), hipMemcpyHostToDevice)); + + HIPCHECK(hipFree(Ad)); + HIPCHECK(hipFree(Bd)); + HIPCHECK(hipFree(A)); + passed(); +} From 8fefda2bb911a55f485cec42bb9c438fa3c3e402 Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Thu, 26 Mar 2020 07:36:09 -0400 Subject: [PATCH 045/132] Initialize all undef symbols with a magic poison (#1962) --- src/program_state.inl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/program_state.inl b/src/program_state.inl index 27fa9f2b0e..bdf127c9c5 100644 --- a/src/program_state.inl +++ b/src/program_state.inl @@ -355,8 +355,11 @@ public: const auto it1 = get_symbol_addresses().find(x); if (it1 == get_symbol_addresses().cend()) { - hip_throw(std::runtime_error{ - "Global symbol: " + x + " is undefined."}); + // For a unknown symbol, initialize it with a magic poison + hsa_executable_agent_global_variable_define( + executable, agent, x.c_str(), + reinterpret_cast(0xDEADBEEFDEADBEEFull)); + continue; } hsa_status_t status; From 3d38135ae29e8b83287c676053315df80800f436 Mon Sep 17 00:00:00 2001 From: Benjamin Sherman Date: Thu, 26 Mar 2020 07:39:00 -0400 Subject: [PATCH 046/132] Add const qualifiers to HIP_vector_type unary arithmetic operators (#1965) Resolves issue #1960 --- include/hip/hcc_detail/hip_vector_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hip/hcc_detail/hip_vector_types.h b/include/hip/hcc_detail/hip_vector_types.h index 7a91b6a532..19259a3657 100644 --- a/include/hip/hcc_detail/hip_vector_types.h +++ b/include/hip/hcc_detail/hip_vector_types.h @@ -694,7 +694,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{}>::type* = nullptr> inline __host__ __device__ - HIP_vector_type operator-() noexcept + HIP_vector_type operator-() const noexcept { auto tmp(*this); tmp.data = -tmp.data; @@ -705,7 +705,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{}>::type* = nullptr> inline __host__ __device__ - HIP_vector_type operator~() noexcept + HIP_vector_type operator~() const noexcept { HIP_vector_type r{*this}; r.data = ~r.data; From 5024f9057a9e8113d934c6cb6525c19bbed1f31c Mon Sep 17 00:00:00 2001 From: Sarbojit2019 <52527887+SarbojitAMD@users.noreply.github.com> Date: Thu, 26 Mar 2020 17:09:44 +0530 Subject: [PATCH 047/132] Fix for __usad issue (#1972) Fixes #1930 --- include/hip/hcc_detail/device_functions.h | 2 +- include/hip/hcc_detail/device_library_decls.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 66a99108da..0a775df275 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -230,7 +230,7 @@ __device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) { return (x + y + 1) >> 1; } __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) { - return __ockl_sad_u32(x, y, z); + return __ockl_sadd_u32(x, y, z); } __device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } diff --git a/include/hip/hcc_detail/device_library_decls.h b/include/hip/hcc_detail/device_library_decls.h index 182565ad61..2c4e6929ef 100644 --- a/include/hip/hcc_detail/device_library_decls.h +++ b/include/hip/hcc_detail/device_library_decls.h @@ -44,7 +44,7 @@ extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint); extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int); extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint); extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int); -extern "C" __device__ __attribute__((const)) uint __ockl_sad_u32(uint, uint, uint); +extern "C" __device__ __attribute__((const)) uint __ockl_sadd_u32(uint, uint, uint); extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar); extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort); From bb72abca2029184f52e1075b4c64923d3be39ffa Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Thu, 26 Mar 2020 06:39:56 -0500 Subject: [PATCH 048/132] Update hip_debugging.md (#1973) Fix link formatting --- docs/markdown/hip_debugging.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/markdown/hip_debugging.md b/docs/markdown/hip_debugging.md index bf877d894e..fde17d410e 100644 --- a/docs/markdown/hip_debugging.md +++ b/docs/markdown/hip_debugging.md @@ -1,13 +1,13 @@ Table of Contents ================= - * [Profiling HIP Code](#profiling-hip-code" aria-hidden="true"> Date: Fri, 27 Mar 2020 14:08:30 +0530 Subject: [PATCH 051/132] Fix few memory leaks in HIP (#1969) --- samples/0_Intro/module_api/defaultDriver.cpp | 4 ++-- samples/0_Intro/module_api/launchKernelHcc.cpp | 4 ++-- samples/0_Intro/module_api/runKernel.cpp | 4 ++-- samples/0_Intro/module_api_global/runKernel.cpp | 4 ++-- src/hip_module.cpp | 10 ++-------- src/hip_texture.cpp | 3 +++ .../module/hipModuleLoadDataMultThreaded.cpp | 4 ++-- 7 files changed, 15 insertions(+), 18 deletions(-) diff --git a/samples/0_Intro/module_api/defaultDriver.cpp b/samples/0_Intro/module_api/defaultDriver.cpp index ea36aabcf4..af8b413ac2 100644 --- a/samples/0_Intro/module_api/defaultDriver.cpp +++ b/samples/0_Intro/module_api/defaultDriver.cpp @@ -80,8 +80,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/module_api/launchKernelHcc.cpp b/samples/0_Intro/module_api/launchKernelHcc.cpp index 38cf0d414c..90e569c5bc 100644 --- a/samples/0_Intro/module_api/launchKernelHcc.cpp +++ b/samples/0_Intro/module_api/launchKernelHcc.cpp @@ -107,8 +107,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/module_api/runKernel.cpp b/samples/0_Intro/module_api/runKernel.cpp index a011b42666..1093b0dd54 100644 --- a/samples/0_Intro/module_api/runKernel.cpp +++ b/samples/0_Intro/module_api/runKernel.cpp @@ -99,8 +99,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/module_api_global/runKernel.cpp b/samples/0_Intro/module_api_global/runKernel.cpp index 3a2804b7a2..4a2d49144c 100644 --- a/samples/0_Intro/module_api_global/runKernel.cpp +++ b/samples/0_Intro/module_api_global/runKernel.cpp @@ -154,8 +154,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 5334e3ff4d..116c4ff94c 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -330,22 +330,18 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList return hipErrorInvalidValue; } - hipFunction_t* kds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); - if (kds == nullptr) { - return hipErrorNotInitialized; - } + std::vector kds(numDevices,0); // prepare all kernel descriptors for each device as all streams will be locked in the next loop for (int i = 0; i < numDevices; ++i) { const hipLaunchParams& lp = launchParamsList[i]; if (lp.stream == nullptr) { - free(kds); return hipErrorNotInitialized; } kds[i] = ps.kernel_descriptor(reinterpret_cast(lp.func), hip_impl::target_agent(lp.stream)); + if (kds[i] == nullptr) { - free(kds); return hipErrorInvalidValue; } hip_impl::kernargs_size_align kargs = ps.get_kernargs_size_align( @@ -398,8 +394,6 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList #endif } - free(kds); - return result; } diff --git a/src/hip_texture.cpp b/src/hip_texture.cpp index 27cf321fbc..5d673ddc79 100644 --- a/src/hip_texture.cpp +++ b/src/hip_texture.cpp @@ -312,6 +312,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) || HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + free(pTexture); return ihipLogStatus(hipErrorRuntimeOther); } @@ -449,6 +450,7 @@ hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode r HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, rowPitch, 0, &(pTexture->image)) || HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + free(pTexture); return hipErrorRuntimeOther; } getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler); @@ -525,6 +527,7 @@ hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) || HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + free(pTexture); return hipErrorRuntimeOther; } getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler); diff --git a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp index e73bbedba5..6bbbbbef34 100644 --- a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp +++ b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp @@ -107,8 +107,8 @@ void run(const std::vector& buffer) { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); } From 43abf84f54832303dca1cdc0325431b6c1d60861 Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Fri, 27 Mar 2020 04:39:07 -0400 Subject: [PATCH 052/132] don't expose symbols from code_object_bundle (#1971) Change-Id: I56479485aad42c3d517fe6d9055be1cd846eeb00 --- CMakeLists.txt | 6 ++-- include/hip/hcc_detail/hiprtc.h | 4 +++ lpl_ca/CMakeLists.txt | 2 +- lpl_ca/ca.hpp | 2 +- src/code_object_bundle.cpp | 34 ------------------- .../code_object_bundle.inl | 30 ++++++++++------ src/hip_module.cpp | 2 +- src/hiprtc.cpp | 2 +- src/program_state.inl | 2 +- 9 files changed, 33 insertions(+), 51 deletions(-) delete mode 100644 src/code_object_bundle.cpp rename include/hip/hcc_detail/code_object_bundle.hpp => src/code_object_bundle.inl (86%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4894168348..0cfa56ac9d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -259,7 +259,6 @@ if(HIP_PLATFORM STREQUAL "hcc") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_HCC_BUILD_FLAGS}") set(SOURCE_FILES_RUNTIME - src/code_object_bundle.cpp src/program_state.cpp src/hip_clang.cpp src/hip_hcc.cpp @@ -305,7 +304,7 @@ if(HIP_PLATFORM STREQUAL "hcc") target_link_libraries(hip_hcc PRIVATE hc_am) target_link_libraries(hip_hcc_static PRIVATE hc_am) - add_library(hiprtc SHARED src/hiprtc.cpp src/code_object_bundle.cpp) + add_library(hiprtc SHARED src/hiprtc.cpp) target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY) set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" ) set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" ) @@ -315,6 +314,9 @@ if(HIP_PLATFORM STREQUAL "hcc") endif() set_target_properties(hip_hcc PROPERTIES CXX_VISIBILITY_PRESET hidden) set_target_properties(hip_hcc PROPERTIES VISIBILITY_INLINES_HIDDEN 1) + set_target_properties(hiprtc PROPERTIES CXX_VISIBILITY_PRESET hidden) + set_target_properties(hiprtc PROPERTIES VISIBILITY_INLINES_HIDDEN 1) + if(HIP_PLATFORM STREQUAL "hcc") find_package(amd_comgr REQUIRED CONFIG diff --git a/include/hip/hcc_detail/hiprtc.h b/include/hip/hcc_detail/hiprtc.h index 624f1ea157..ec9c85716a 100644 --- a/include/hip/hcc_detail/hiprtc.h +++ b/include/hip/hcc_detail/hiprtc.h @@ -28,6 +28,8 @@ extern "C" { #include +#pragma GCC visibility push (default) + enum hiprtcResult { HIPRTC_SUCCESS = 0, HIPRTC_ERROR_OUT_OF_MEMORY = 1, @@ -79,6 +81,8 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code); hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet); +#pragma GCC visibility pop + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/lpl_ca/CMakeLists.txt b/lpl_ca/CMakeLists.txt index ac01a6a0ab..f626b88d89 100644 --- a/lpl_ca/CMakeLists.txt +++ b/lpl_ca/CMakeLists.txt @@ -14,7 +14,7 @@ install(TARGETS lpl RUNTIME DESTINATION bin) #-------------------------------------LPL--------------------------------------# #-------------------------------------CA---------------------------------------# -add_executable(ca ca.cpp ${PROJECT_SOURCE_DIR}/src/code_object_bundle.cpp) +add_executable(ca ca.cpp) set_target_properties( ca PROPERTIES CXX_STANDARD 11 diff --git a/lpl_ca/ca.hpp b/lpl_ca/ca.hpp index db63f02498..2d691cd38a 100644 --- a/lpl_ca/ca.hpp +++ b/lpl_ca/ca.hpp @@ -2,7 +2,7 @@ #include "common.hpp" -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "../src/code_object_bundle.inl" #include "clara/clara.hpp" diff --git a/src/code_object_bundle.cpp b/src/code_object_bundle.cpp deleted file mode 100644 index feef90a61a..0000000000 --- a/src/code_object_bundle.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "../include/hip/hcc_detail/code_object_bundle.hpp" - -#include - -#include -#include -#include -#include -#include - -using namespace std; - -// CREATORS -hip_impl::Bundled_code_header::Bundled_code_header(const vector& x) - : Bundled_code_header{x.cbegin(), x.cend()} {} - -hip_impl::Bundled_code_header::Bundled_code_header( - const void* p) { // This is a pretty terrible interface, useful only because - // hipLoadModuleData is so poorly specified (for no fault of its own). - if (!p) return; - - if (!valid(*static_cast(p))) return; - auto ph = static_cast(p); - - size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header); - auto pb = static_cast(p) + sizeof(Header_); - auto n = ph->bundle_cnt_; - while (n--) { - sz += reinterpret_cast(pb)->bundle_sz; - pb += sizeof(Bundled_code::Header); - } - - read(static_cast(p), static_cast(p) + sz, *this); -} diff --git a/include/hip/hcc_detail/code_object_bundle.hpp b/src/code_object_bundle.inl similarity index 86% rename from include/hip/hcc_detail/code_object_bundle.hpp rename to src/code_object_bundle.inl index 77e0d706d6..596ac60661 100644 --- a/include/hip/hcc_detail/code_object_bundle.hpp +++ b/src/code_object_bundle.inl @@ -92,10 +92,6 @@ struct Bundled_code { #define magic_string_ "__CLANG_OFFLOAD_BUNDLE__" -#ifdef __GNUC__ -#pragma GCC visibility push (default) -#endif - class Bundled_code_header { // DATA - STATICS static constexpr auto magic_string_sz_ = sizeof(magic_string_) - 1; @@ -167,8 +163,26 @@ class Bundled_code_header { Bundled_code_header() = default; template Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l); - explicit Bundled_code_header(const std::vector& blob); - explicit Bundled_code_header(const void* maybe_blob); + explicit Bundled_code_header(const std::vector& blob) + : Bundled_code_header{blob.cbegin(), blob.cend()} {} + explicit Bundled_code_header(const void* maybe_blob) { + // This is a pretty terrible interface, useful only because + // hipLoadModuleData is so poorly specified (for no fault of its own). + if (!maybe_blob) return; + + if (!valid(*static_cast(maybe_blob))) return; + auto ph = static_cast(maybe_blob); + + size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header); + auto pb = static_cast(maybe_blob) + sizeof(Header_); + auto n = ph->bundle_cnt_; + while (n--) { + sz += reinterpret_cast(pb)->bundle_sz; + pb += sizeof(Bundled_code::Header); + } + + read(static_cast(maybe_blob), static_cast(maybe_blob) + sz, *this); + } Bundled_code_header(const Bundled_code_header&) = default; Bundled_code_header(Bundled_code_header&&) = default; ~Bundled_code_header() = default; @@ -180,10 +194,6 @@ class Bundled_code_header { size_t bundled_code_size = 0; }; -#ifdef __GNUC__ -#pragma GCC visibility pop -#endif - // CREATORS template Bundled_code_header::Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 116c4ff94c..2d4fde7c26 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -50,7 +50,7 @@ THE SOFTWARE. #include #include #include -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "code_object_bundle.inl" #include "hip_fatbin.h" // TODO Use Pool APIs from HCC to get memory regions. diff --git a/src/hiprtc.cpp b/src/hiprtc.cpp index 3c7fe6e78c..4efdbad653 100644 --- a/src/hiprtc.cpp +++ b/src/hiprtc.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include "../include/hip/hiprtc.h" -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "code_object_bundle.inl" #include "../include/hip/hcc_detail/elfio/elfio.hpp" #include "../include/hip/hcc_detail/program_state.hpp" diff --git a/src/program_state.inl b/src/program_state.inl index bdf127c9c5..c62b8f4061 100644 --- a/src/program_state.inl +++ b/src/program_state.inl @@ -1,6 +1,6 @@ #include "../include/hip/hcc_detail/program_state.hpp" -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "code_object_bundle.inl" #include "../include/hip/hcc_detail/hsa_helpers.hpp" #if !defined(__cpp_exceptions) From 351d39e6aa66ca1978b05006cbe2d7fbf36b8845 Mon Sep 17 00:00:00 2001 From: satyanveshd <53337087+satyanveshd@users.noreply.github.com> Date: Fri, 27 Mar 2020 14:10:12 +0530 Subject: [PATCH 053/132] [dtests] Added few Negative tests (#1735) --- src/hip_memory.cpp | 4 -- .../Negative/memory/hipMemcpyFromSymbol.cpp | 46 +++++++++++++++++ .../memory/hipMemcpyFromSymbolAsync.cpp | 49 +++++++++++++++++++ .../src/Negative/memory/hipMemcpyToSymbol.cpp | 46 +++++++++++++++++ .../memory/hipMemcpyToSymbolAsync.cpp | 49 +++++++++++++++++++ tests/src/Negative/memory/hipMemory.cpp | 43 ++++++++++++++++ .../stream/hipStreamCreateWithFlags.cpp | 40 +++++++++++++++ 7 files changed, 273 insertions(+), 4 deletions(-) create mode 100644 tests/src/Negative/memory/hipMemcpyFromSymbol.cpp create mode 100644 tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp create mode 100644 tests/src/Negative/memory/hipMemcpyToSymbol.cpp create mode 100644 tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp create mode 100644 tests/src/Negative/memory/hipMemory.cpp create mode 100644 tests/src/Negative/stream/hipStreamCreateWithFlags.cpp diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 3c9b1616af..e166a84aa0 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -2271,8 +2271,6 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { auto device = ctx->getWriteableDevice(); if (total) { *total = device->_props.totalGlobalMem; - } else { - e = hipErrorInvalidValue; } if (free) { @@ -2295,8 +2293,6 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { } else { return ihipLogStatus(hipErrorInvalidValue); } - } else { - e = hipErrorInvalidValue; } } else { diff --git a/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp b/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp new file mode 100644 index 0000000000..10f8c51a6d --- /dev/null +++ b/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp @@ -0,0 +1,46 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 1024 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + e = hipMemcpyFromSymbol(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyFromSymbol(S, NULL, SIZE, 0, hipMemcpyDeviceToHost); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp b/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp new file mode 100644 index 0000000000..fa341c6cea --- /dev/null +++ b/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp @@ -0,0 +1,49 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 1024 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + e = hipMemcpyFromSymbolAsync(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyFromSymbolAsync(S, NULL, SIZE, 0, hipMemcpyDeviceToHost, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/tests/src/Negative/memory/hipMemcpyToSymbol.cpp b/tests/src/Negative/memory/hipMemcpyToSymbol.cpp new file mode 100644 index 0000000000..8626c2c34f --- /dev/null +++ b/tests/src/Negative/memory/hipMemcpyToSymbol.cpp @@ -0,0 +1,46 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 1024 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + e = hipMemcpyToSymbol(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyToSymbol(NULL, S, SIZE, 0, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp b/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp new file mode 100644 index 0000000000..832e4336be --- /dev/null +++ b/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp @@ -0,0 +1,49 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 100 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + e = hipMemcpyToSymbolAsync(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyToSymbolAsync(NULL, S, SIZE, 0, hipMemcpyHostToDevice, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/tests/src/Negative/memory/hipMemory.cpp b/tests/src/Negative/memory/hipMemory.cpp new file mode 100644 index 0000000000..b062d05cc1 --- /dev/null +++ b/tests/src/Negative/memory/hipMemory.cpp @@ -0,0 +1,43 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 100 + +int main(){ + hipError_t e; + char str[SIZE]="Hi, I am Ellesemere. What is ur name?"; + + e = hipMemcpy(0, str, SIZE, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidValue); + + e = hipMemcpy(NULL, str, SIZE, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidValue); + + e = hipMemset(0,99,80); + HIPASSERT(e==hipErrorInvalidValue); + + passed(); +} diff --git a/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp new file mode 100644 index 0000000000..8a1dc07b62 --- /dev/null +++ b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp @@ -0,0 +1,40 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" + +int main(){ + + hipError_t e; + hipStream_t stream; + + e = hipStreamCreateWithFlags(&stream, -1); + HIPASSERT(e==hipErrorInvalidValue); + + e = hipStreamCreateWithFlags(&stream, 2); + HIPASSERT(e==hipErrorInvalidValue); + + passed(); +} From cbc3d1713fbb036c3e1f4534e680103c08f30f81 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Sat, 28 Mar 2020 17:28:33 +0530 Subject: [PATCH 054/132] Remove address_space(1) typecast and use __ockl_atomic_add_noret_f32 (#1956) * Remove address_space(1) typecast for ockl_global_atomic_add_f32 * use __ockl_atomic_add_noret_f32 --- include/hip/hcc_detail/device_library_decls.h | 2 +- include/hip/hcc_detail/hip_atomic.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hip/hcc_detail/device_library_decls.h b/include/hip/hcc_detail/device_library_decls.h index 2c4e6929ef..2eadb86774 100644 --- a/include/hip/hcc_detail/device_library_decls.h +++ b/include/hip/hcc_detail/device_library_decls.h @@ -72,7 +72,7 @@ extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank( extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void); extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void); -extern "C" __device__ void __ockl_global_atomic_add_f32(__attribute__((address_space(1))) float*, float); +extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float); // Introduce local address space #define __local __attribute__((address_space(3))) diff --git a/include/hip/hcc_detail/hip_atomic.h b/include/hip/hcc_detail/hip_atomic.h index 7ccfa6b43e..d00ebcdabb 100644 --- a/include/hip/hcc_detail/hip_atomic.h +++ b/include/hip/hcc_detail/hip_atomic.h @@ -78,7 +78,7 @@ __device__ inline void atomicAddNoRet(float* address, float val) { - __ockl_global_atomic_add_f32((__attribute__((address_space(1))) float*)address, val); + __ockl_atomic_add_noret_f32(address, val); } __device__ From eba596c87ae48cb33608afa0e81017d3d1ff9539 Mon Sep 17 00:00:00 2001 From: Sarbojit2019 <52527887+SarbojitAMD@users.noreply.github.com> Date: Sat, 28 Mar 2020 17:29:49 +0530 Subject: [PATCH 055/132] Fix for segfault seen in hipMemcpyDtoD (#1964) * Fixes SWDEV-227444. --- src/hip_memory.cpp | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index e166a84aa0..e7bc348951 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -309,12 +309,14 @@ void generic_copy(void* __restrict dst, const void* __restrict src, size_t n, if (di.size == is_cpu_owned) return d2h_copy(dst, src, n, si); if (si.size == is_cpu_owned) return h2d_copy(dst, src, n, di); - throwing_result_check(hsa_amd_agents_allow_access(1u, &si.agentOwner, - nullptr, - di.agentBaseAddress), - __FILE__, __func__, __LINE__); - - return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + hsa_status_t res = hsa_amd_agents_allow_access(1u, &si.agentOwner, + nullptr, di.agentBaseAddress); + if (res == HSA_STATUS_SUCCESS){ + return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + } + // If devices do not have access then fallback mechanism will be used + // copy will be slower + throwing_result_check(hsa_memory_copy(dst,src,n), __FILE__, __func__, __LINE__); } inline @@ -341,11 +343,16 @@ void memcpy_impl(void* __restrict dst, const void* __restrict src, size_t n, case hipMemcpyHostToDevice: return h2d_copy(dst, src, n, di); case hipMemcpyDeviceToHost: return d2h_copy(dst, src, n, si); case hipMemcpyDeviceToDevice: { - throwing_result_check(hsa_amd_agents_allow_access(1u, &si.agentOwner, - nullptr, - di.agentBaseAddress), - __FILE__, __func__, __LINE__); - return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + hsa_status_t res = hsa_amd_agents_allow_access(1u, &si.agentOwner, + nullptr, di.agentBaseAddress); + if (res == HSA_STATUS_SUCCESS){ + return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + } + + // If devices do not have access then fallback mechanism will be used + // copy will be slower + throwing_result_check(hsa_memory_copy(dst,src,n), __FILE__, __func__, __LINE__); + break; } default: return generic_copy(dst, src, n, di, si); } From 50ef250a3b80fc58d1e1fe1688eb053782a6adc6 Mon Sep 17 00:00:00 2001 From: ansurya <50609411+ansurya@users.noreply.github.com> Date: Wed, 1 Apr 2020 12:10:17 +0530 Subject: [PATCH 056/132] tex1Dfetch behaviour for different address mode and filter mode (#1772) Fixes github issue: #1754 - When ResourceDesc::resType is hipResourceTypeLinear ignore address mode and filter mode. - When textureDesc::normalizedCoords is set to zero, AddressModeWrap and AddressModeMirror won't be supported and will be switched to AddressModeClamp. --- src/hip_texture.cpp | 23 +++- tests/src/texture/hipTex1DFetchCheckModes.cpp | 122 ++++++++++++++++++ 2 files changed, 141 insertions(+), 4 deletions(-) create mode 100644 tests/src/texture/hipTex1DFetchCheckModes.cpp diff --git a/src/hip_texture.cpp b/src/hip_texture.cpp index 5d673ddc79..29f0465dc1 100644 --- a/src/hip_texture.cpp +++ b/src/hip_texture.cpp @@ -301,7 +301,12 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou hsa_ext_sampler_descriptor_t samplerDescriptor; fillSamplerDescriptor(samplerDescriptor, pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords); - + if(hipResourceTypeLinear == pResDesc->resType) { + samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST; + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER; + } else if(!pTexDesc->normalizedCoords) { + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; if(hipResourceTypePitch2D != pResDesc->resType) @@ -439,7 +444,13 @@ hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode r imageDescriptor.format.channel_type = channelType; hsa_ext_sampler_descriptor_t samplerDescriptor; - fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); + samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST; + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER; + if (normalizedCoords) { + samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED; + } else { + samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; @@ -516,7 +527,9 @@ hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode hsa_ext_sampler_descriptor_t samplerDescriptor; fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); - + if(!normalizedCoords) { + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; if( 0 == pitch) @@ -623,7 +636,9 @@ hipError_t ihipBindTextureToArrayImpl(TlsData *tls_, int dim, enum hipTextureRea hsa_ext_sampler_descriptor_t samplerDescriptor; fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); - + if(!normalizedCoords) { + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(imageDescriptor.width, IMAGE_PITCH_ALIGNMENT); diff --git a/tests/src/texture/hipTex1DFetchCheckModes.cpp b/tests/src/texture/hipTex1DFetchCheckModes.cpp new file mode 100644 index 0000000000..9b7a36c6be --- /dev/null +++ b/tests/src/texture/hipTex1DFetchCheckModes.cpp @@ -0,0 +1,122 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/*HIT_START + * BUILD: %t %s ../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "../test_common.h" + +#define N 16 +#define offset 3 +__global__ void tex1dKernel(float *val, hipTextureObject_t obj) { + int k = blockIdx.x * blockDim.x + threadIdx.x; + if (k < N) + val[k] = tex1Dfetch(obj, k+offset); +} + +int runTest(hipTextureAddressMode, hipTextureFilterMode); + +int main(int argc, char **argv) { + int testResult = runTest(hipAddressModeClamp,hipFilterModePoint); + testResult = runTest(hipAddressModeClamp,hipFilterModeLinear); + testResult = runTest(hipAddressModeWrap,hipFilterModePoint); + testResult = runTest(hipAddressModeWrap,hipFilterModeLinear); + if(testResult) { + passed(); + } else { + exit(EXIT_FAILURE); + } +} + +int runTest(hipTextureAddressMode addressMode, hipTextureFilterMode filterMode) { + + int testResult = 1; + + hipCtx_t HipContext; + hipDevice_t HipDevice; + int deviceID = 0; + hipDeviceGet(&HipDevice, deviceID); + hipCtxCreate(&HipContext, 0, HipDevice); + + // Allocating the required buffer on gpu device + float *texBuf, *texBufOut; + float val[N], output[N]; + + for (int i = 0; i < N; i++) { + val[i] = i+1; + output[i] = 0.0; + } + + HIPCHECK(hipMalloc(&texBuf, N * sizeof(float))); + HIPCHECK(hipMalloc(&texBufOut, N * sizeof(float))); + HIPCHECK(hipMemcpy(texBuf, val, N * sizeof(float), hipMemcpyHostToDevice)); + HIPCHECK(hipMemset(texBufOut, 0, N * sizeof(float))); + hipResourceDesc resDescLinear; + + memset(&resDescLinear, 0, sizeof(resDescLinear)); + resDescLinear.resType = hipResourceTypeLinear; + resDescLinear.res.linear.devPtr = texBuf; + resDescLinear.res.linear.desc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); + resDescLinear.res.linear.sizeInBytes = N * sizeof(float); + + hipTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.readMode = hipReadModeElementType; + + texDesc.addressMode[0] = addressMode; + texDesc.addressMode[1] = addressMode; + texDesc.filterMode = filterMode; + texDesc.normalizedCoords = false; + + // Creating texture object + hipTextureObject_t texObj = 0; + HIPCHECK(hipCreateTextureObject(&texObj, &resDescLinear, &texDesc, NULL)); + + dim3 dimBlock(1, 1, 1); + dim3 dimGrid(N , 1, 1); + + hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, + texBufOut, texObj); + HIPCHECK(hipDeviceSynchronize()); + + HIPCHECK(hipMemcpy(output, texBufOut, N * sizeof(float), hipMemcpyDeviceToHost)); + + for (int i = offset; i < N; i++) { + if (output[i-offset] != val[i]) { + testResult = 0; + break; + } + } + if(testResult){ + for(int i = N-offset; i < N; i++){ + if (output[i] != 0){ + testResult = 0; + break; + } + } + } + HIPCHECK(hipDestroyTextureObject(texObj)); + HIPCHECK(hipFree(texBuf)); + HIPCHECK(hipFree(texBufOut)); + return testResult; +} From 8c4e8d6484ca146fbd187bc7ea4bec38e427a3f5 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Wed, 1 Apr 2020 01:40:33 -0500 Subject: [PATCH 057/132] adding hipApiString (#1989) --- hip_prof_gen.py | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/hip_prof_gen.py b/hip_prof_gen.py index d2da7cd4df..567fba0d4a 100755 --- a/hip_prof_gen.py +++ b/hip_prof_gen.py @@ -388,30 +388,29 @@ def generate_prof_header(f, api_map, opts_map): f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n') # Generating the method for the API string, name and parameters - if False: - f.write('\n') - f.write('#if 0\n') - f.write('#include \n'); - f.write('#include \n'); - f.write('// HIP API string method, method name and parameters\n') - f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') - f.write(' std::ostringstream oss;\n') - f.write(' switch (id) {\n') - for name, args in api_map.items(): - f.write(' case HIP_API_ID_' + name + ':\n') - f.write(' oss << "' + name + '("') - for ind in range(0, len(args)): - arg_tuple = args[ind] - arg_name = arg_tuple[1] - if ind != 0: f.write(' << ","') - f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) - f.write('\n << ")";\n') - f.write(' break;\n') - f.write(' default: oss << "unknown";\n') - f.write(' };\n') - f.write(' return strdup(oss.str().c_str());\n') - f.write('};\n') - f.write('#endif\n') + f.write('\n') + f.write('#if ENABLE_HIP_API_STRING\n') + f.write('#include \n'); + f.write('#include \n'); + f.write('// HIP API string method, method name and parameters\n') + f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') + f.write(' std::ostringstream oss;\n') + f.write(' switch (id) {\n') + for name, args in api_map.items(): + f.write(' case HIP_API_ID_' + name + ':\n') + f.write(' oss << "' + name + '("') + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_name = arg_tuple[1] + if ind != 0: f.write(' << ","') + f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) + f.write('\n << ")";\n') + f.write(' break;\n') + f.write(' default: oss << "unknown";\n') + f.write(' };\n') + f.write(' return strdup(oss.str().c_str());\n') + f.write('};\n') + f.write('#endif // ENABLE_HIP_API_STRING\n') f.write('#endif // _HIP_PROF_STR_H\n'); From e4a1c49e7243ede15ee44a09fd7df7133812e6ec Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Sat, 4 Apr 2020 15:10:28 -0500 Subject: [PATCH 058/132] adding HIP_PROF version macro (#2000) --- hip_prof_gen.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hip_prof_gen.py b/hip_prof_gen.py index 567fba0d4a..38b7aaeb6e 100755 --- a/hip_prof_gen.py +++ b/hip_prof_gen.py @@ -306,6 +306,7 @@ def generate_prof_header(f, api_map, opts_map): f.write('// automatically generated sources\n') f.write('#ifndef _HIP_PROF_STR_H\n'); f.write('#define _HIP_PROF_STR_H\n'); + f.write('#define HIP_PROF_VER 1\n') # Generating dummy macro for non-public API f.write('\n// Dummy API primitives\n') @@ -389,7 +390,7 @@ def generate_prof_header(f, api_map, opts_map): # Generating the method for the API string, name and parameters f.write('\n') - f.write('#if ENABLE_HIP_API_STRING\n') + f.write('#if HIP_PROF_HIP_API_STRING\n') f.write('#include \n'); f.write('#include \n'); f.write('// HIP API string method, method name and parameters\n') @@ -410,7 +411,7 @@ def generate_prof_header(f, api_map, opts_map): f.write(' };\n') f.write(' return strdup(oss.str().c_str());\n') f.write('};\n') - f.write('#endif // ENABLE_HIP_API_STRING\n') + f.write('#endif // HIP_PROF_HIP_API_STRING\n') f.write('#endif // _HIP_PROF_STR_H\n'); From 4af2106d10ae5ed33fb0f8ab7d3716d0b61273bd Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Mon, 6 Apr 2020 06:05:18 -0400 Subject: [PATCH 059/132] Fix ambiguity of fma for _Float16 for libc++ (#1976) libc++ defines fma as template function for auto promotion of mixed-type arguments. libc++ does not handle _Float16 as _Float16 is not a supported type by C++ standard. As such, it is unlikely we can commit our fix for _Float16 to libc++ trunk. Therefore we handle _Float16 with a template specialization of __numeric_type in HIP headers. Change-Id: If01960a657ebf1a7a67463cdcf66fab7458dff3c --- include/hip/hcc_detail/math_functions.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h index 19def9ec7e..e146b1ae4f 100644 --- a/include/hip/hcc_detail/math_functions.h +++ b/include/hip/hcc_detail/math_functions.h @@ -47,6 +47,19 @@ THE SOFTWARE. #include "kalmar_math.h" #endif +#if _LIBCPP_VERSION && __HIP__ +namespace std { +template <> +struct __numeric_type<_Float16> +{ + static _Float16 __test(_Float16); + + typedef _Float16 type; + static const bool value = true; +}; +} +#endif // _LIBCPP_VERSION + #pragma push_macro("__DEVICE__") #pragma push_macro("__RETURN_TYPE") From 770e76e7525d35fe4d47fdf3f230937fa7ca8367 Mon Sep 17 00:00:00 2001 From: ansurya <50609411+ansurya@users.noreply.github.com> Date: Mon, 6 Apr 2020 15:35:43 +0530 Subject: [PATCH 060/132] Initial support for bfloat16 (#1980) --- include/hip/hip_bfloat16.h | 280 ++++++++++++++++++++++++++++ tests/src/deviceLib/hipBfloat16.cpp | 137 ++++++++++++++ 2 files changed, 417 insertions(+) create mode 100644 include/hip/hip_bfloat16.h create mode 100644 tests/src/deviceLib/hipBfloat16.cpp diff --git a/include/hip/hip_bfloat16.h b/include/hip/hip_bfloat16.h new file mode 100644 index 0000000000..ef09cf00d0 --- /dev/null +++ b/include/hip/hip_bfloat16.h @@ -0,0 +1,280 @@ +/** + * MIT License + * + * Copyright 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/*!\file + * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef + */ + +#ifndef _HIP_BFLOAT16_H_ +#define _HIP_BFLOAT16_H_ + +#if __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only +// include a minimal definition of hip_bfloat16 + +#include +/*! \brief Struct to represent a 16 bit brain floating point number. */ +typedef struct +{ + uint16_t data; +} hip_bfloat16; + +#else // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +#include +#include +#include +#include +#include +#include + +struct hip_bfloat16 +{ + uint16_t data; + + enum truncate_t + { + truncate + }; + + __host__ __device__ hip_bfloat16() = default; + + // round upper 16 bits of IEEE float to convert to bfloat16 + explicit __host__ __device__ hip_bfloat16(float f) + : data(float_to_bfloat16(f)) + { + } + + explicit __host__ __device__ hip_bfloat16(float f, truncate_t) + : data(truncate_float_to_bfloat16(f)) + { + } + + // zero extend lower 16 bits of bfloat16 to convert to IEEE float + __host__ __device__ operator float() const + { + union + { + uint32_t int32; + float fp32; + } u = {uint32_t(data) << 16}; + return u.fp32; + } + + static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f) + { + hip_bfloat16 output; + output.data = float_to_bfloat16(f); + return output; + } + + static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f, truncate_t) + { + hip_bfloat16 output; + output.data = truncate_float_to_bfloat16(f); + return output; + } + +private: + static __host__ __device__ uint16_t float_to_bfloat16(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + if(~u.int32 & 0x7f800000) + { + // When the exponent bits are not all 1s, then the value is zero, normal, + // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus + // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd). + // This causes the bfloat16's mantissa to be incremented by 1 if the 16 + // least significant bits of the float mantissa are greater than 0x8000, + // or if they are equal to 0x8000 and the least significant bit of the + // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when + // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already + // has the value 0x7f, then incrementing it causes it to become 0x00 and + // the exponent is incremented by one, which is the next higher FP value + // to the unrounded bfloat16 value. When the bfloat16 value is subnormal + // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up + // to a normal value with an exponent of 0x01 and a mantissa of 0x00. + // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, + // incrementing it causes it to become an exponent of 0xFF and a mantissa + // of 0x00, which is Inf, the next higher value to the unrounded value. + u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even + } + else if(u.int32 & 0xffff) + { + // When all of the exponent bits are 1, the value is Inf or NaN. + // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero + // mantissa bit. Quiet NaN is indicated by the most significant mantissa + // bit being 1. Signaling NaN is indicated by the most significant + // mantissa bit being 0 but some other bit(s) being 1. If any of the + // lower 16 bits of the mantissa are 1, we set the least significant bit + // of the bfloat16 mantissa, in order to preserve signaling NaN in case + // the bloat16's mantissa bits are all 0. + u.int32 |= 0x10000; // Preserve signaling NaN + } + return uint16_t(u.int32 >> 16); + } + + // Truncate instead of rounding, preserving SNaN + static __host__ __device__ uint16_t truncate_float_to_bfloat16(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + return uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff)); + } +}; + +typedef struct +{ + uint16_t data; +} hip_bfloat16_public; + +static_assert(std::is_standard_layout{}, + "hip_bfloat16 is not a standard layout type, and thus is " + "incompatible with C."); + +static_assert(std::is_trivial{}, + "hip_bfloat16 is not a trivial type, and thus is " + "incompatible with C."); + +static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public) + && offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data), + "internal hip_bfloat16 does not match public hip_bfloat16"); + +inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16) +{ + return os << float(bf16); +} +inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a) +{ + return a; +} +inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a) +{ + a.data ^= 0x8000; + return a; +} +inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) + float(b)); +} +inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) - float(b)); +} +inline __host__ __device__ hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) * float(b)); +} +inline __host__ __device__ hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) / float(b)); +} +inline __host__ __device__ bool operator<(hip_bfloat16 a, hip_bfloat16 b) +{ + return float(a) < float(b); +} +inline __host__ __device__ bool operator==(hip_bfloat16 a, hip_bfloat16 b) +{ + return float(a) == float(b); +} +inline __host__ __device__ bool operator>(hip_bfloat16 a, hip_bfloat16 b) +{ + return b < a; +} +inline __host__ __device__ bool operator<=(hip_bfloat16 a, hip_bfloat16 b) +{ + return !(a > b); +} +inline __host__ __device__ bool operator!=(hip_bfloat16 a, hip_bfloat16 b) +{ + return !(a == b); +} +inline __host__ __device__ bool operator>=(hip_bfloat16 a, hip_bfloat16 b) +{ + return !(a < b); +} +inline __host__ __device__ hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a + b; +} +inline __host__ __device__ hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a - b; +} +inline __host__ __device__ hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a * b; +} +inline __host__ __device__ hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a / b; +} +inline __host__ __device__ hip_bfloat16& operator++(hip_bfloat16& a) +{ + return a += hip_bfloat16(1.0f); +} +inline __host__ __device__ hip_bfloat16& operator--(hip_bfloat16& a) +{ + return a -= hip_bfloat16(1.0f); +} +inline __host__ __device__ hip_bfloat16 operator++(hip_bfloat16& a, int) +{ + hip_bfloat16 orig = a; + ++a; + return orig; +} +inline __host__ __device__ hip_bfloat16 operator--(hip_bfloat16& a, int) +{ + hip_bfloat16 orig = a; + --a; + return orig; +} + +namespace std +{ + constexpr __host__ __device__ bool isinf(hip_bfloat16 a) + { + return !(~a.data & 0x7f80) && !(a.data & 0x7f); + } + constexpr __host__ __device__ bool isnan(hip_bfloat16 a) + { + return !(~a.data & 0x7f80) && +(a.data & 0x7f); + } + constexpr __host__ __device__ bool iszero(hip_bfloat16 a) + { + return !(a.data & 0x7fff); + } +} + +#endif // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +#endif // _HIP_BFLOAT16_H_ diff --git a/tests/src/deviceLib/hipBfloat16.cpp b/tests/src/deviceLib/hipBfloat16.cpp new file mode 100644 index 0000000000..306d995ffe --- /dev/null +++ b/tests/src/deviceLib/hipBfloat16.cpp @@ -0,0 +1,137 @@ +/* +Copyright (c) 2015-2019 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ +#include "test_common.h" +#include +#include +#include +#include +#include + +#define SIZE 100 +using namespace std; + +static random_device dev; +static mt19937 rng(dev()); + +inline float getRandomFloat(long min = 10, long max = LONG_MAX) { + uniform_real_distribution gen(min, max); + return gen(rng); +} + +__host__ __device__ bool testRelativeAccuracy(float a, hip_bfloat16 b) { + float c = float(b); + // float relative error should be less than 1/(2^7) since bfloat16 + // has 7 bits mantissa. + if(fabs(c - a) / a <= 1.0 / 128){ + return true; + } + return false; +} + +__host__ __device__ void testOperations(float &fa, float &fb) { + + hip_bfloat16 bf_a(fa); + hip_bfloat16 bf_b(fb); + float fc = float(bf_a); + float fd = float(bf_b); + + assert(testRelativeAccuracy(fa, bf_a)); + assert(testRelativeAccuracy(fb, bf_b)); + + assert(testRelativeAccuracy(fc + fd, bf_a + bf_b)); + //when checked as above for add, operation sub fails on GPU + assert(hip_bfloat16(fc - fd) == (bf_a - bf_b)); + assert(testRelativeAccuracy(fc * fd, bf_a * bf_b)); + assert(testRelativeAccuracy(fc / fd, bf_a / bf_b)); + + hip_bfloat16 bf_opNegate = -bf_a; + assert(bf_opNegate == -bf_a); + + hip_bfloat16 bf_x; + bf_x = bf_a; + bf_x++; + bf_x--; + ++bf_x; + --bf_x; + //hip_bfloat16 is converted to float and then inc/decremented, hence check with reduced precision + assert(testRelativeAccuracy(bf_x,bf_a)); + + bf_x = bf_a; + bf_x += bf_b; + assert(bf_x == (bf_a + bf_b)); + bf_x = bf_a; + bf_x -= bf_b; + assert(bf_x == (bf_a - bf_b)); + bf_x = bf_a; + bf_x *= bf_b; + assert(bf_x == (bf_a * bf_b)); + bf_x = bf_a; + bf_x /= bf_b; + assert(bf_x == (bf_a / bf_b)); + + hip_bfloat16 bf_rounded = hip_bfloat16::round_to_bfloat16(fa); + if (isnan(bf_rounded)) { + assert(isnan(bf_rounded) || isinf(bf_rounded)); + } +} + +__global__ void testOperationsGPU(float* d_a, float* d_b) +{ + int id = threadIdx.x; + if (id > SIZE) return; + float &a = d_a[id]; + float &b = d_b[id]; + testOperations(a, b); +} + +int main(){ + float *h_fa, *h_fb; + float *d_fa, *d_fb; + + h_fa = new float[SIZE]; + h_fb = new float[SIZE]; + for (int i = 0; i < SIZE; i++) { + h_fa[i] = getRandomFloat(); + h_fb[i] = getRandomFloat(); + testOperations(h_fa[i], h_fb[i]); + } + cout<<"Host bfloat16 Operations Successful!!"< Date: Mon, 6 Apr 2020 03:05:59 -0700 Subject: [PATCH 061/132] Fix 2D and 3D memset (#1987) --- src/hip_memory.cpp | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index e7bc348951..0aee149c9f 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -2219,16 +2219,40 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { return ihipLogStatus(ihipMemsetSync(dst, value, sizeBytes, nullptr, ihipMemsetDataTypeChar)); } +hipError_t ihipMemsetND(void* dst, size_t pitch, int value, size_t width, size_t height, size_t setHeight,size_t depth, + hipStream_t stream, enum ihipMemsetDataType copyDataType, bool async) { + size_t sizeBytes =0; + hipError_t hipStatus = hipSuccess; + if ((pitch == width) && (height == setHeight)) { + sizeBytes = pitch * setHeight * depth; + if(async) + return ihipMemsetAsync(dst, value, sizeBytes, stream, copyDataType); + else + return ihipMemsetSync(dst, value, sizeBytes, nullptr, copyDataType); + } else { + for(size_t i = 0; i < depth; ++i) { + for(size_t j = 0; j < setHeight; ++j) { + void* dstPtr = ((unsigned char*) dst + i * height * pitch + j * pitch); + if(async) + hipStatus = ihipMemsetAsync(dstPtr, value, width, stream, copyDataType); + else + hipStatus = ihipMemsetSync(dstPtr, value, width, nullptr, copyDataType); + if (hipStatus != hipSuccess) + return hipStatus; + } + } + } + return hipStatus; +} + hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { HIP_INIT_SPECIAL_API(hipMemset2D, (TRACE_MCMD), dst, pitch, value, width, height); - size_t sizeBytes = pitch * height; - return ihipLogStatus(ihipMemsetSync(dst, value, sizeBytes, nullptr, ihipMemsetDataTypeChar)); + return ihipLogStatus(ihipMemsetND(dst, pitch, value, width, height, height, 1, hipStreamNull, ihipMemsetDataTypeChar, false)); } hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream ) { HIP_INIT_SPECIAL_API(hipMemset2DAsync, (TRACE_MCMD), dst, pitch, value, width, height, stream); - size_t sizeBytes = pitch * height; - return ihipLogStatus(ihipMemsetAsync(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar)); + return ihipLogStatus(ihipMemsetND(dst, pitch, value, width, height, height, 1, stream, ihipMemsetDataTypeChar, true)); } hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t count) { @@ -2258,14 +2282,12 @@ hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { HIP_INIT_SPECIAL_API(hipMemset3D, (TRACE_MCMD), &pitchedDevPtr, value, &extent); - size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; - return ihipLogStatus(ihipMemsetSync(pitchedDevPtr.ptr, value, sizeBytes, nullptr, ihipMemsetDataTypeChar)); + return ihipLogStatus(ihipMemsetND(pitchedDevPtr.ptr, pitchedDevPtr.pitch ,value, extent.width, pitchedDevPtr.ysize, extent.height, extent.depth, hipStreamNull, ihipMemsetDataTypeChar, false)); } hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream ) { HIP_INIT_SPECIAL_API(hipMemset3DAsync, (TRACE_MCMD), &pitchedDevPtr, value, &extent); - size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; - return ihipLogStatus(ihipMemsetAsync(pitchedDevPtr.ptr, value, sizeBytes, stream, ihipMemsetDataTypeChar)); + return ihipLogStatus(ihipMemsetND(pitchedDevPtr.ptr,pitchedDevPtr.pitch, value, extent.width, pitchedDevPtr.ysize, extent.height, extent.depth, stream, ihipMemsetDataTypeChar, true)); } hipError_t hipMemGetInfo(size_t* free, size_t* total) { From 6358e40a762b7c0e36bfbd6ef0fe01b757115dbc Mon Sep 17 00:00:00 2001 From: Jatin Chaudhary <51944368+cjatin@users.noreply.github.com> Date: Mon, 6 Apr 2020 15:37:07 +0530 Subject: [PATCH 062/132] Removing header size from formula (#1988) Fixed a bug in the elf file size computation. --- src/hip_module.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 2d4fde7c26..192aba5da6 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -1179,8 +1179,7 @@ string read_elf_file_as_string(const void* file) { auto h = static_cast(file); auto s = static_cast(file); // This assumes the common case of SHT being the last part of the ELF. - auto sz = - sizeof(ELFIO::Elf64_Ehdr) + h->e_shoff + h->e_shentsize * h->e_shnum; + auto sz = h->e_shoff + h->e_shentsize * h->e_shnum; return string{s, s + sz}; } From 9de5e90ab5e30ed1029d226f91c42f635f5c7a69 Mon Sep 17 00:00:00 2001 From: lmoriche Date: Mon, 6 Apr 2020 03:07:35 -0700 Subject: [PATCH 063/132] Don't duplicate embedded code objects (#1991) If the code object is embedded in an already mapped file, and the lifetime of the mapped file exceeds the lifetime of the executable, we do not need to make a copy of the binary. This allows the ROCR to present the code object URI as file:///path/to/file#offset=X&size=Y. --- include/hip/hcc_detail/program_state.hpp | 3 +++ src/hip_clang.cpp | 6 +++--- src/program_state.cpp | 9 ++++++++- src/program_state.inl | 21 ++++++++++++++++----- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp index fca88f8094..6128a4c158 100644 --- a/include/hip/hcc_detail/program_state.hpp +++ b/include/hip/hcc_detail/program_state.hpp @@ -73,6 +73,9 @@ public: hsa_executable_t load_executable(const char*, const size_t, hsa_executable_t, hsa_agent_t); + hsa_executable_t load_executable_no_copy(const char*, const size_t, + hsa_executable_t, + hsa_agent_t); void* global_addr_by_name(const char* name); diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp index 4c8ae07134..93f5a82a2f 100644 --- a/src/hip_clang.cpp +++ b/src/hip_clang.cpp @@ -89,9 +89,9 @@ __hipRegisterFatBinary(const void* data) reinterpret_cast(header) + desc->offset), desc->size}; if (HIP_DUMP_CODE_OBJECT) __hipDumpCodeObject(image); - module->executable = hip_impl::get_program_state().load_executable(image.data(), image.size(), - module->executable, - agent); + module->executable = hip_impl::get_program_state().load_executable_no_copy( + reinterpret_cast(header) + desc->offset, desc->size, + module->executable, agent); if (module->executable.handle) { modules->at(deviceId) = module; diff --git a/src/program_state.cpp b/src/program_state.cpp index 5e9f9976be..975dcda321 100644 --- a/src/program_state.cpp +++ b/src/program_state.cpp @@ -68,7 +68,14 @@ namespace hip_impl { const size_t data_size, hsa_executable_t executable, hsa_agent_t agent) { - return impl->load_executable(data, data_size, executable, agent); + return impl->load_executable(data, data_size, true, executable, agent); + } + + hsa_executable_t program_state::load_executable_no_copy(const char* data, + const size_t data_size, + hsa_executable_t executable, + hsa_agent_t agent) { + return impl->load_executable(data, data_size, false, executable, agent); } hipFunction_t program_state::kernel_descriptor(std::uintptr_t function_address, diff --git a/src/program_state.inl b/src/program_state.inl index c62b8f4061..760dafea22 100644 --- a/src/program_state.inl +++ b/src/program_state.inl @@ -406,11 +406,13 @@ public: } void load_code_object_and_freeze_executable( - const std::string& file, hsa_agent_t agent, hsa_executable_t executable) { + const char* data, + const size_t data_size, bool make_copy, + hsa_agent_t agent, hsa_executable_t executable) { // TODO: the following sequence is inefficient, should be refactored // into a single load of the file and subsequent ELFIO // processing. - if (file.empty()) return; + if (!data_size) return; static const auto cor_deleter = [] (hsa_code_object_reader_t* p) { if (!p) return; @@ -423,8 +425,16 @@ public: decltype(code_readers.second)::iterator it; { std::lock_guard lck{code_readers.first}; + + std::string file; + if (make_copy) + file = std::string(data, data_size); + code_readers.second.emplace_back(move(file), move(tmp)); it = std::prev(code_readers.second.end()); + + if (make_copy) + data = it->first.data(); } auto check_hsa_error = [](hsa_status_t s) { @@ -438,7 +448,7 @@ public: }; check_hsa_error(hsa_code_object_reader_create_from_memory( - it->first.data(), it->first.size(), it->second.get())); + data, data_size, it->second.get())); check_hsa_error(hsa_executable_load_agent_code_object( executable, agent, *it->second, nullptr, nullptr)); @@ -485,7 +495,7 @@ public: // TODO: this is massively inefficient and only meant for // illustration. - tmp = impl.load_executable(blob.data(), blob.size(), tmp, a); + tmp = impl.load_executable(blob.data(), blob.size(), true, tmp, a); if (tmp.handle) current_exes.push_back(tmp); } @@ -503,6 +513,7 @@ public: hsa_executable_t load_executable(const char* data, const size_t data_size, + bool make_copy, hsa_executable_t executable, hsa_agent_t agent) { ELFIO::elfio reader; @@ -519,7 +530,7 @@ public: code_object_dynsym, agent, executable); - load_code_object_and_freeze_executable(move(ts), agent, executable); + load_code_object_and_freeze_executable(data, data_size, make_copy, agent, executable); return executable; } From b80a2c3966e18f5f6b7552ef2eccce9d67568861 Mon Sep 17 00:00:00 2001 From: Sarbojit2019 <52527887+SarbojitAMD@users.noreply.github.com> Date: Mon, 6 Apr 2020 15:38:25 +0530 Subject: [PATCH 064/132] hipEventElapsedTime should respect device (#1992) Fixes SWDEV-228636. Also added a unit test to verify this. --- src/hip_event.cpp | 11 +- src/hip_hcc_internal.h | 1 + .../runtimeApi/event/hipEventElapsedTime.cpp | 103 ++++++++++++++++++ 3 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 tests/src/runtimeApi/event/hipEventElapsedTime.cpp diff --git a/src/hip_event.cpp b/src/hip_event.cpp index 733f0d9db3..b297fabbd9 100644 --- a/src/hip_event.cpp +++ b/src/hip_event.cpp @@ -30,7 +30,12 @@ THE SOFTWARE. //--- -ihipEvent_t::ihipEvent_t(unsigned flags) : _criticalData(this) { _flags = flags; }; +ihipEvent_t::ihipEvent_t(unsigned flags) : _criticalData(this) { + _flags = flags; + GET_TLS(); + auto ctx = ihipGetTlsDefaultCtx(); + _deviceId = ctx == nullptr ? -1 : ctx->getDevice()->_deviceId; +}; // Attach to an existing completion future: @@ -175,7 +180,9 @@ hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) { HIP_INIT_API(hipEventElapsedTime, ms, start, stop); if (ms == nullptr) return ihipLogStatus(hipErrorInvalidValue); - if ((start == nullptr) || (stop == nullptr)) return ihipLogStatus(hipErrorInvalidHandle); + if ((start == nullptr) || (stop == nullptr) || + (start->_deviceId != stop->_deviceId)) + return ihipLogStatus(hipErrorInvalidHandle); *ms = 0.0f; auto startEcd = start->locked_copyCrit(); diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index c7ff27c7b5..93551c8316 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -732,6 +732,7 @@ class ihipEvent_t { public: unsigned _flags; + int _deviceId; private: ihipEventCritical_t _criticalData; diff --git a/tests/src/runtimeApi/event/hipEventElapsedTime.cpp b/tests/src/runtimeApi/event/hipEventElapsedTime.cpp new file mode 100644 index 0000000000..d9e3bf9c5a --- /dev/null +++ b/tests/src/runtimeApi/event/hipEventElapsedTime.cpp @@ -0,0 +1,103 @@ +/* +Copyright (c) 2020-Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ + +#include "test_common.h" + +void NegativeTests(){ + + // Null pointers + { + hipEvent_t start,end; + float tms = 1.0f; + HIPASSERT(hipEventElapsedTime(nullptr,start,end) == hipErrorInvalidValue); +#ifndef __HIP_PLATFORM_NVCC__ + // On NVCC platform API throws seg fault hence skipping + HIPASSERT(hipEventElapsedTime(&tms,nullptr,end) == hipErrorInvalidHandle); + HIPASSERT(hipEventElapsedTime(&tms,start,nullptr) == hipErrorInvalidHandle); +#endif + } + + // Event created using disabled timing + { + float timeElapsed = 1.0f; + hipEvent_t start, stop; + HIPCHECK(hipEventCreateWithFlags(&start,hipEventDisableTiming)); + HIPCHECK(hipEventCreateWithFlags(&stop,hipEventDisableTiming)); + HIPASSERT(hipEventElapsedTime(&timeElapsed, start, stop) == hipErrorInvalidHandle); + } + + // events created different devices + { + int devCount = 0; + HIPCHECK(hipGetDeviceCount(&devCount)); + if (devCount > 1){ + // create event on dev=0 + HIPCHECK(hipSetDevice(0)); + hipEvent_t start; + HIPCHECK(hipEventCreate(&start)); + + // create event on dev=1 + HIPCHECK(hipSetDevice(1)); + hipEvent_t stop; + HIPCHECK(hipEventCreate(&stop)); + + HIPCHECK(hipEventRecord(start, nullptr)); + HIPCHECK(hipEventSynchronize(start)); + + HIPCHECK(hipEventRecord(stop, nullptr)); + HIPCHECK(hipEventSynchronize(stop)); + + float tElapsed = 1.0f; + HIPASSERT(hipEventElapsedTime(&tElapsed,start,stop) == hipErrorInvalidHandle); + } + } +} + +void PositiveTest(){ + hipEvent_t start; + HIPCHECK(hipEventCreate(&start)); + + hipEvent_t stop; + HIPCHECK(hipEventCreate(&stop)); + + HIPCHECK(hipEventRecord(start, nullptr)); + HIPCHECK(hipEventSynchronize(start)); + + HIPCHECK(hipEventRecord(stop, nullptr)); + HIPCHECK(hipEventSynchronize(stop)); + + float tElapsed = 1.0f; + HIPCHECK(hipEventElapsedTime(&tElapsed,start,stop)); +} + +int main(){ + + NegativeTests(); + PositiveTest(); + passed(); +} From 59afcb1091c96988fb31a80d6fb48ab85b8f6c24 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Mon, 6 Apr 2020 03:09:10 -0700 Subject: [PATCH 065/132] Bump version to 3.5 (#1993) * Switch CI testing from rocm-3.1.x to rocm-3.3.x * Update hcc workweek for cooperative view * bump version to 3.5 --- Jenkinsfile | 8 ++++---- bin/hipconfig | 2 +- src/hip_module.cpp | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e38f7824d2..a7cec9fd1b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -295,13 +295,13 @@ def docker_upload_dockerhub( String local_org, String image_name, String remote_ String build_config = 'Release' String job_name = env.JOB_NAME.toLowerCase( ) -// The following launches 3 builds in parallel: rocm-head, rocm-3.1.x and cuda-10.x -parallel rocm_3_1: +// The following launches 3 builds in parallel: rocm-head, rocm-3.3.x and cuda-10.x +parallel rocm_3_3: { node('hip-rocm') { - String hcc_ver = 'rocm-3.1.x' - String from_image = 'ci_test_nodes/rocm-3.1.x/ubuntu-16.04:latest' + String hcc_ver = 'rocm-3.3.x' + String from_image = 'ci_test_nodes/rocm-3.3.x/ubuntu-16.04:latest' String inside_args = '--device=/dev/kfd --device=/dev/dri --group-add=video' // Checkout source code, dependencies and version files diff --git a/bin/hipconfig b/bin/hipconfig index c56b56ecd8..033908d2d3 100755 --- a/bin/hipconfig +++ b/bin/hipconfig @@ -1,7 +1,7 @@ #!/usr/bin/perl -w $HIP_BASE_VERSION_MAJOR = "3"; -$HIP_BASE_VERSION_MINOR = "2"; +$HIP_BASE_VERSION_MINOR = "5"; # Need perl > 5.10 to use logic-defined or use 5.006; use v5.10.1; diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 192aba5da6..d331d996da 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -493,7 +493,7 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream, hip_impl::program_state& ps) { -#if (__hcc_workweek__ >= 20093) +#if (__hcc_workweek__ >= 20115) hipError_t result; @@ -641,7 +641,7 @@ hipError_t hipLaunchCooperativeKernel(const void* func, dim3 gridDim, hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags, hip_impl::program_state& ps) { -#if (__hcc_workweek__ >= 20093) +#if (__hcc_workweek__ >= 20115) hipError_t result; if (numDevices > g_deviceCnt || launchParamsList == nullptr || numDevices > MAX_COOPERATIVE_GPUs) { From a12cc8b031f64aaaa86539686eab12c54857241b Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Mon, 6 Apr 2020 03:09:30 -0700 Subject: [PATCH 066/132] use hsa_executable_get_symbol_by_name in find_kernel_by_name (#1994) --- src/hip_module.cpp | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index d331d996da..9692876695 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -1143,26 +1143,11 @@ inline hsa_status_t remove_agent_global_variables(hsa_executable_t, hsa_agent_t hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const char* kname, hsa_agent_t* agent = nullptr) { using namespace hip_impl; - - pair r{kname, {}}; - - hsa_executable_iterate_agent_symbols( - executable, agent ? *agent : this_agent(), - [](hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* s) { - auto p = static_cast*>(s); - - if (type(x) != HSA_SYMBOL_KIND_KERNEL) { - return HSA_STATUS_SUCCESS; - } - if (name(x) != p->first) return HSA_STATUS_SUCCESS; - - p->second = x; - - return HSA_STATUS_INFO_BREAK; - }, - &r); - - return r.second; + hsa_executable_symbol_t symbol = { 0 }; + hsa_agent_t thisagent = agent ? *agent : this_agent(); + hsa_status_t err = hsa_executable_get_symbol_by_name(executable, kname, &thisagent ,&symbol); + //TODO check err ? + return symbol; } From 17862812b4378b8fd6fb7cadb3c32f2075713934 Mon Sep 17 00:00:00 2001 From: satyanveshd <53337087+satyanveshd@users.noreply.github.com> Date: Mon, 6 Apr 2020 15:39:49 +0530 Subject: [PATCH 067/132] fix hipIpcOpenMemHandle (#1998) --- src/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 0aee149c9f..832dcc5531 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -2513,7 +2513,7 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned crit->peerAgents(), devPtr) != HSA_STATUS_SUCCESS) return ihipLogStatus(hipErrorRuntimeOther); - hc::AmPointerInfo ampi(NULL, *devPtr, *devPtr, sizeof(*devPtr), acc, true, true); + hc::AmPointerInfo ampi(NULL, *devPtr, *devPtr, iHandle->psize, acc, true, true); am_status_t am_status = hc::am_memtracker_add(*devPtr,ampi); if (am_status != AM_SUCCESS) return ihipLogStatus(hipErrorMapFailed); From ba8a556ea9e1a34ed3a8867a85ed48cb039463f7 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Tue, 7 Apr 2020 01:32:52 -0700 Subject: [PATCH 068/132] Rename hipDrvOccupancy to hipModuleOccupancy and match CUDA syntax (#1943) --- .../hip/hcc_detail/functional_grid_launch.hpp | 18 +++-- include/hip/hcc_detail/hip_runtime_api.h | 63 ++++++++++------ include/hip/nvcc_detail/hip_runtime_api.h | 74 ++++++++++++++++--- samples/2_Cookbook/13_occupancy/occupancy.cpp | 8 +- src/hip_module.cpp | 67 ++++++++++------- .../module/hipLaunchCoopMultiKernel.cpp | 5 +- .../module/hipLaunchCooperativeKernel.cpp | 2 +- ...leOccupancyMaxPotentialActiveBlockSize.cpp | 54 ++++++++++++++ ...upancyMaxActiveBlocksPerMultiprocessor.cpp | 22 +----- .../hipOccupancyMaxPotentialBlockSize.cpp | 21 +----- 10 files changed, 222 insertions(+), 112 deletions(-) create mode 100644 tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp rename tests/src/runtimeApi/{module => occupancy}/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp (77%) rename tests/src/runtimeApi/{module => occupancy}/hipOccupancyMaxPotentialBlockSize.cpp (75%) diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index 5abe1095df..8f07e48d46 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -140,10 +140,10 @@ void hipLaunchKernelGGLImpl( } // Namespace hip_impl. -template +template inline -hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize, - F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) { +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { using namespace hip_impl; @@ -151,22 +151,24 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block auto f = get_program_state().kernel_descriptor(reinterpret_cast(kernel), target_agent(0)); - return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, + return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit); } -template +template inline -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(uint32_t* numBlocks, F kernel, - uint32_t blockSize, size_t dynSharedMemPerBlk) { +hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) { using namespace hip_impl; hip_impl::hip_init(); + if(flags != hipOccupancyDefault) return hipErrorNotSupported; auto f = get_program_state().kernel_descriptor(reinterpret_cast(kernel), target_agent(0)); - return hipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, blockSize, dynSharedMemPerBlk); + return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, + dynSharedMemPerBlk, blockSizeLimit); } template diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 9103b7e3ff..12fd9b7a91 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -266,7 +266,6 @@ typedef enum hipSharedMemConfig { ///< when adjacent threads access data 4 bytes apart. } hipSharedMemConfig; - /** * Struct for data in 3D * @@ -2940,9 +2939,28 @@ hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsLi * * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue */ -hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize, + +//TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, - uint32_t blockSizeLimit); + int blockSizeLimit); + +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * @param [in] flags Extra flags for occupancy calculation (only default supported) + * + * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue + */ +//TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags); /** * @brief Returns occupancy for a device function. @@ -2953,7 +2971,7 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block */ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( - uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk); + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk); /** * @brief Returns occupancy for a device function. @@ -2963,7 +2981,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( * @param [in] blockSize Block size the kernel is intended to be launched with * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block */ -hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor( +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor( int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk); /** @@ -2976,7 +2994,7 @@ hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor( * @param [in] flags Extra flags for occupancy calculation (currently ignored) */ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault)); + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault)); /** * @brief Returns occupancy for a device function. @@ -2985,9 +3003,9 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated * @param [in] blockSize Block size the kernel is intended to be launched with * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block - * @param [in] flags Extra flags for occupancy calculation (currently ignored) + * @param [in] flags Extra flags for occupancy calculation (only default supported) */ -hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); #if __HIP_VDI__ && !defined(__HCC__) @@ -3258,21 +3276,6 @@ hipError_t hipLaunchKernel(const void* function_address, } /* extern "c" */ #endif -#if defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__) -template -static hipError_t __host__ inline hipOccupancyMaxActiveBlocksPerMultiprocessor( - uint32_t* numBlocks, F func, uint32_t blockSize, size_t dynSharedMemPerBlk) { - return ::hipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, (hipFunction_t)func, blockSize, - dynSharedMemPerBlk); -} -template -static hipError_t __host__ inline hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - uint32_t* numBlocks, F func, uint32_t blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { - return ::hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - numBlocks, (hipFunction_t)func, blockSize, dynSharedMemPerBlk, flags); -} -#endif // defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__) - #if USE_PROF_API #include #endif @@ -3295,6 +3298,20 @@ const char* hipKernelNameRef(const hipFunction_t f); #ifdef __cplusplus +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk) { + return hipOccupancyMaxActiveBlocksPerMultiprocessor( + numBlocks, reinterpret_cast(f), blockSize, dynSharedMemPerBlk); +} + +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { + return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks, reinterpret_cast(f), blockSize, dynSharedMemPerBlk, flags); +} + class TlsData; hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index d9eb3e4146..3890028950 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -1295,10 +1295,50 @@ inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBl const void* func, int blockSize, size_t dynamicSMemSize) { - cudaError_t cerror; - cerror = - cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, blockSize, dynamicSMemSize); - return hipCUDAErrorTohipError(cerror); + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, + blockSize, dynamicSMemSize)); +} + +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + const void* func, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, + blockSize, dynamicSMemSize, flags)); +} + +inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + hipFunction_t f, + int blockSize, + size_t dynamicSMemSize ){ + return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, + blockSize, dynamicSMemSize)); +} + +inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + hipFunction_t f, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags ) { + return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f, + blockSize, dynamicSMemSize, flags)); +} + +//TODO - Match CUoccupancyB2DSize +inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit){ + return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL, + dynSharedMemPerBlk, blockSizeLimit)); +} + +//TODO - Match CUoccupancyB2DSize +inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags){ + return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL, + dynSharedMemPerBlk, blockSizeLimit, flags)); } inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { @@ -1713,19 +1753,31 @@ inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBl T func, int blockSize, size_t dynamicSMemSize) { - cudaError_t cerror; - cerror = - cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, blockSize, dynamicSMemSize); - return hipCUDAErrorTohipError(cerror); + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, + blockSize, dynamicSMemSize)); } template inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func, size_t dynamicSMemSize = 0, int blockSizeLimit = 0) { - cudaError_t cerror; - cerror = cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, dynamicSMemSize, blockSizeLimit); - return hipCUDAErrorTohipError(cerror); + return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, + dynamicSMemSize, blockSizeLimit)); +} + +template +inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func, + size_t dynamicSMemSize = 0, + int blockSizeLimit = 0, unsigned int flags = 0) { + return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, + dynamicSMemSize, blockSizeLimit, flags)); +} + +template +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func, + int blockSize, size_t dynamicSMemSize,unsigned int flags) { + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, + blockSize, dynamicSMemSize, flags)); } template diff --git a/samples/2_Cookbook/13_occupancy/occupancy.cpp b/samples/2_Cookbook/13_occupancy/occupancy.cpp index a9f4e198b0..01fa7aafed 100644 --- a/samples/2_Cookbook/13_occupancy/occupancy.cpp +++ b/samples/2_Cookbook/13_occupancy/occupancy.cpp @@ -56,9 +56,9 @@ void launchKernel(float* C, float* A, float* B, bool manual){ const unsigned threadsperblock = 32; const unsigned blocks = (NUM/threadsperblock)+1; - uint32_t mingridSize = 0; - uint32_t gridSize = 0; - uint32_t blockSize = 0; + int mingridSize = 0; + int gridSize = 0; + int blockSize = 0; if (manual){ blockSize = threadsperblock; @@ -86,7 +86,7 @@ void launchKernel(float* C, float* A, float* B, bool manual){ printf("kernel Execution time = %6.3fms\n", eventMs); //Calculate Occupancy - uint32_t numBlock = 0; + int numBlock = 0; HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0)); if(devProp.maxThreadsPerMultiProcessor){ diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 9692876695..d0ec0df9de 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -421,7 +421,7 @@ void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size } static hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( - TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) + TlsData *tls, int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) { using namespace hip_impl; @@ -469,13 +469,13 @@ static hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); // Calculate blocks occupancy per CU based on SGPR usage - *numBlocks = std::min(*numBlocks, (uint32_t) (sgprs_alu_occupancy / numWavefronts)); + *numBlocks = std::min(*numBlocks, (int) (sgprs_alu_occupancy / numWavefronts)); size_t total_used_lds = usedLDS + dynSharedMemPerBlk; if (total_used_lds != 0) { // Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds) size_t lds_occupancy = prop.maxSharedMemoryPerMultiProcessor / total_used_lds; - *numBlocks = std::min(*numBlocks, (uint32_t) lds_occupancy); + *numBlocks = std::min(*numBlocks, (int) lds_occupancy); } return hipSuccess; @@ -545,7 +545,7 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, std::pair>*>(kargs.getHandle()); GET_TLS(); - uint32_t numBlocksPerSm = 0; + int numBlocksPerSm = 0; result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kd, blockDim.x * blockDim.y * blockDim.z, sharedMemBytes); if (result != hipSuccess) { @@ -712,7 +712,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL kds[i]->_kernarg_layout = *reinterpret_cast>*>( kargs.getHandle()); - uint32_t numBlocksPerSm = 0; + int numBlocksPerSm = 0; result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kds[i], lp.blockDim.x * lp.blockDim.y * lp.blockDim.z, lp.sharedMem); if (result != hipSuccess) { @@ -1481,9 +1481,9 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const return ihipLogStatus(hipSuccess); } -hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, uint32_t* gridSize, uint32_t* blockSize, +hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, int* gridSize, int* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, - uint32_t blockSizeLimit) + int blockSizeLimit) { using namespace hip_impl; @@ -1593,51 +1593,66 @@ hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, uint32_t* gridSize, return hipSuccess; } -hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize, +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, - uint32_t blockSizeLimit) + int blockSizeLimit) { - HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit); + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit); + return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize(tls, + gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit)); +} + +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags) +{ + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, gridSize, blockSize, f, dynSharedMemPerBlk, + blockSizeLimit, flags); + if(flags != hipOccupancyDefault) return ihipLogStatus(hipErrorNotSupported); return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize(tls, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit)); } hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( - uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk) { HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk); + auto F = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)(f), + hip_impl::target_agent(0)); + return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( + tls, numBlocks, F, blockSize, dynSharedMemPerBlk)); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) +{ + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk); return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); } -hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor( - int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) -{ - HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk); - - return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( - tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk)); -} - hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk, + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags); - + if(flags != hipOccupancyDefault) return ihipLogStatus(hipErrorNotSupported); + auto F = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)(f), + hip_impl::target_agent(0)); return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( - tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); + tls, numBlocks, F, blockSize, dynSharedMemPerBlk)); } -hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { - HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags); + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags); + if(flags != hipOccupancyDefault) return ihipLogStatus(hipErrorNotSupported); return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( - tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk)); + tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); } hipError_t hipLaunchKernel( diff --git a/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp b/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp index b4d57a7693..c565426f2d 100644 --- a/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp +++ b/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp @@ -101,7 +101,6 @@ int main() { uint* dA[MaxGPUs]; long* dB[MaxGPUs]; long* dC; - hipModule_t Module; hipStream_t stream[MaxGPUs]; uint32_t* init = new uint32_t[BufferSizeInDwords]; @@ -156,8 +155,8 @@ int main() { for (int i = 0; i < nGpu; i++) { HIPCHECK(hipSetDevice(i)); dimBlock.x = workgroups[set]; - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(reinterpret_cast(&numBlocks), - (hipFunction_t)test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long))); + HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, + test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long))); std::cout << "GPU(" << i << ") Block size: " << dimBlock.x << " Num blocks per CU: " << numBlocks << "\n"; diff --git a/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp b/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp index c76685fa89..896738892d 100644 --- a/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp +++ b/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp @@ -116,7 +116,7 @@ int main() { dimBlock.x = workgroups[i]; // Calculate the device occupancy to know how many blocks can be run concurrently - hipOccupancyMaxActiveBlocksPerMultiprocessor(reinterpret_cast(&numBlocks), + hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long)); dimGrid.x = deviceProp.multiProcessorCount * std::min(numBlocks, 32); diff --git a/tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp b/tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp new file mode 100644 index 0000000000..f6935d0d68 --- /dev/null +++ b/tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp @@ -0,0 +1,54 @@ +/* +Copyright (c) 2019 - prsent Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "test_common.h" + +#define fileName "vcpy_kernel.code" +#define kernel_name "hello_world" + +int main(int argc, char* argv[]) { + + int gridSize = 0; + int blockSize = 0; + int numBlock = 0; + HIPCHECK(hipInit(0)); + + hipDevice_t device; + hipCtx_t context; + HIPCHECK(hipDeviceGet(&device, 0)); + HIPCHECK(hipCtxCreate(&context, 0, device)); + + hipModule_t Module; + hipFunction_t Function; + HIPCHECK(hipModuleLoad(&Module, fileName)); + HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + HIPCHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, Function, 0, 0)); + assert(gridSize != 0 && blockSize != 0); + HIPCHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, Function, blockSize, 0)); + assert(numBlock != 0); + HIPCHECK(hipCtxDestroy(context)); + passed(); +} diff --git a/tests/src/runtimeApi/module/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp b/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp similarity index 77% rename from tests/src/runtimeApi/module/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp rename to tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp index ebf656b72f..33ca8263e1 100644 --- a/tests/src/runtimeApi/module/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp +++ b/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. // Test the Grid_Launch syntax. /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp * TEST: %t * HIT_END */ @@ -30,10 +30,6 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" -#define fileName "vcpy_kernel.code" -#define kernel_name "hello_world" - - __global__ void f1(float *a) { *a = 1.0; } template @@ -44,12 +40,12 @@ __global__ void f2(T *a) { *a = 1; } int main(int argc, char* argv[]) { // test case for using kernel function pointer - uint32_t gridSize = 0; - uint32_t blockSize = 0; + int gridSize = 0; + int blockSize = 0; hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f1, 0, 0); assert(gridSize != 0 && blockSize != 0); - uint32_t numBlock = 0; + int numBlock = 0; hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, f1, blockSize, 0); assert(numBlock != 0); @@ -64,15 +60,5 @@ int main(int argc, char* argv[]) { hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, f2, blockSize, 0); assert(numBlock != 0); - - // test case for using kernel with hipFunction_t type - numBlock = 0; - hipModule_t Module; - hipFunction_t Function; - HIPCHECK(hipModuleLoad(&Module, fileName)); - HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, Function, blockSize, 0)); - assert(numBlock != 0); - passed(); } diff --git a/tests/src/runtimeApi/module/hipOccupancyMaxPotentialBlockSize.cpp b/tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp similarity index 75% rename from tests/src/runtimeApi/module/hipOccupancyMaxPotentialBlockSize.cpp rename to tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp index a81862952d..fc8538df26 100644 --- a/tests/src/runtimeApi/module/hipOccupancyMaxPotentialBlockSize.cpp +++ b/tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. // Test the Grid_Launch syntax. /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp * TEST: %t * HIT_END */ @@ -30,10 +30,6 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" -#define fileName "vcpy_kernel.code" -#define kernel_name "hello_world" - - __global__ void f1(float *a) { *a = 1.0; } template @@ -44,8 +40,8 @@ __global__ void f2(T *a) { *a = 1; } int main(int argc, char* argv[]) { // test case for using kernel function pointer - uint32_t gridSize = 0; - uint32_t blockSize = 0; + int gridSize = 0; + int blockSize = 0; hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f1, 0, 0); assert(gridSize != 0 && blockSize != 0); @@ -54,16 +50,5 @@ int main(int argc, char* argv[]) { blockSize = 0; hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f2, 0, 0); assert(gridSize != 0 && blockSize != 0); - - // test case for using kernel with hipFunction_t type - gridSize = 0; - blockSize = 0; - hipModule_t Module; - hipFunction_t Function; - HIPCHECK(hipModuleLoad(&Module, fileName)); - HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); - HIPCHECK(hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, Function, 0, 0)); - assert(gridSize != 0 && blockSize != 0); - passed(); } From ff8d193b7ffe055b99a827b6e168687f637f66a8 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 7 Apr 2020 11:33:19 +0300 Subject: [PATCH 069/132] [HIP][doc] Update docs due to moving of HIPIFY to a separate repo (#2001) --- README.md | 6 ++-- docs/doxygen-input/mainpage.txt | 2 +- docs/markdown/hip_faq.md | 30 ++++++++--------- docs/markdown/hip_kernel_language.md | 4 +-- docs/markdown/hip_porting_driver_api.md | 4 +-- docs/markdown/hip_porting_guide.md | 32 +++++++++---------- docs/markdown/hip_terms2.md | 12 +++---- packaging/convert_md_to_html.sh | 8 ++--- samples/0_Intro/square/README.md | 2 +- .../2_Cookbook/0_MatrixTranspose/Readme.md | 2 +- samples/2_Cookbook/10_inline_asm/Readme.md | 2 +- .../12_cmake_hip_add_executable/Readme.md | 2 +- samples/2_Cookbook/1_hipEvent/Readme.md | 2 +- samples/2_Cookbook/3_shared_memory/Readme.md | 2 +- samples/2_Cookbook/4_shfl/Readme.md | 2 +- samples/2_Cookbook/5_2dshfl/Readme.md | 2 +- samples/2_Cookbook/6_dynamic_shared/Readme.md | 2 +- samples/2_Cookbook/7_streams/Readme.md | 2 +- samples/2_Cookbook/9_unroll/Readme.md | 2 +- 19 files changed, 58 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index c2e2a7a456..13e7c8f335 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Key features include: * HIP is very thin and has little or no performance impact over coding directly in CUDA or hcc "HC" mode. * HIP allows coding in a single-source C++ programming language including features such as templates, C++11 lambdas, classes, namespaces, and more. * HIP allows developers to use the "best" development environment and tools on each target platform. -* The [HIPIFY](hipify-clang/README.md) tools automatically convert source from CUDA to HIP. +* The [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) tools automatically convert source from CUDA to HIP. * Developers can specialize for the platform (CUDA or hcc) to tune for performance or handle tricky cases New projects can be developed directly in the portable HIP C++ language and can run on either NVIDIA or AMD platforms. Additionally, HIP provides porting tools which make it easy to port existing CUDA codes to the HIP layer, with no loss of performance as compared to the original CUDA application. HIP is not intended to be a drop-in replacement for CUDA, and developers should expect to do some manual coding and performance tuning work to complete the port. @@ -37,7 +37,7 @@ HIP releases are typically of two types. The tag naming convention is different - [HIP Profiling ](docs/markdown/hip_profiling.md) - [HIP Debugging](docs/markdown/hip_debugging.md) - [HIP Terminology](docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenCL) -- [HIPIFY](hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - Supported CUDA APIs: * [Runtime API](docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md) * [Driver API](docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md) @@ -115,7 +115,7 @@ provides source portability to either platform. HIP provides the _hipcc_ compi ## Examples and Getting Started: -* A sample and [blog](http://gpuopen.com/hip-to-be-squared-an-introductory-hip-tutorial) that uses any of [HIPIFY](hipify-clang/README.md) tools to convert a simple app from CUDA to HIP: +* A sample and [blog](http://gpuopen.com/hip-to-be-squared-an-introductory-hip-tutorial) that uses any of [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) tools to convert a simple app from CUDA to HIP: ```shell diff --git a/docs/doxygen-input/mainpage.txt b/docs/doxygen-input/mainpage.txt index 48177198cc..b5dcae3956 100644 --- a/docs/doxygen-input/mainpage.txt +++ b/docs/doxygen-input/mainpage.txt @@ -10,7 +10,7 @@ * - HIP is very thin and has little or no performance impact over coding directly in CUDA NVCC or HCC HC mode. * - HIP allows developers to use the "best" development environment and tools on each target platform. * - HIP allows coding in a single-source C++ programming language including features such as templates, C++11 lambdas,and more. - * - "hipify" tool automatically converts source from CUDA to HIP. + * - "HIPIFY" tools automatically convert CUDA sources to HIP. * - Developers can specialize for CUDA or HCC to tune for performance or handle tricky cases with #ifdef. * - See the @ref API. diff --git a/docs/markdown/hip_faq.md b/docs/markdown/hip_faq.md index 0bbb797cc1..f8a7489fb7 100644 --- a/docs/markdown/hip_faq.md +++ b/docs/markdown/hip_faq.md @@ -12,7 +12,7 @@ - [How does HIP compare with OpenCL?](#how-does-hip-compare-with-opencl) - [How does porting CUDA to HIP compare to porting CUDA to OpenCL?](#how-does-porting-cuda-to-hip-compare-to-porting-cuda-to-opencl) - [What hardware does HIP support?](#what-hardware-does-hip-support) -- [Does Hipify automatically convert all source code?](#does-hipify-automatically-convert-all-source-code) +- [Do HIPIFY tools automatically convert all source code?](#do-hipify-tools-automatically-convert-all-source-code) - [What is NVCC?](#what-is-nvcc) - [What is HCC?](#what-is-hcc) - [Why use HIP rather than supporting CUDA directly?](#why-use-hip-rather-than-supporting-cuda-directly) @@ -79,23 +79,23 @@ scan code to identify any unsupported CUDA functions - this is useful for identi However, we can provide a rough summary of the features included in each CUDA SDK and the support level in HIP. Each bullet below lists the major new language features in each CUDA release and then indicate which are supported/not supported in HIP: -- CUDA 4.0 and earlier : +- CUDA 4.0 and earlier : - HIP supports CUDA 4.0 except for the limitations described above. -- CUDA 5.0 : +- CUDA 5.0 : - Dynamic Parallelism (not supported) - cuIpc functions (under development). -- CUDA 5.5 : +- CUDA 5.5 : - CUPTI (not directly supported, [AMD GPUPerfAPI](http://developer.amd.com/tools-and-sdks/graphics-development/gpuperfapi/) can be used as an alternative in some cases) -- CUDA 6.0 +- CUDA 6.0 : - Managed memory (under development) -- CUDA 6.5 +- CUDA 6.5 : - __shfl intriniscs (supported) -- CUDA 7.0 +- CUDA 7.0 : - Per-thread-streams (under development) - C++11 (HCC supports all of C++11, all of C++14 and some C++17 features) -- CUDA 7.5 +- CUDA 7.5 : - float16 (supported) -- CUDA 8.0 +- CUDA 8.0 : - Page Migration including cudaMemAdvise, cudaMemPrefetch, other cudaMem* APIs(not supported) @@ -108,8 +108,8 @@ The hip interfaces support both ROCm and CUDA paths, with familiar library inter - [hipfft](https://github.com/ROCmSoftwarePlatform/hcFFT) - [hipsparse](https://github.com/ROCmSoftwarePlatform/hcSPARSE) - [hiprng](https://github.com/ROCmSoftwarePlatform/hcrng) - -Additionally, some of the cublas routines are automatically converted to hipblas equivalents by the hipify-clang tool. These APIs use cublas or hcblas depending on the platform and replace the need + +Additionally, some of the cublas routines are automatically converted to hipblas equivalents by the HIPIFY tools. These APIs use cublas or hcblas depending on the platform and replace the need to use conditional compilation. ### How does HIP compare with OpenCL? @@ -126,7 +126,7 @@ HIP offers several benefits over OpenCL: ### How does porting CUDA to HIP compare to porting CUDA to OpenCL? Both HIP and CUDA are dialects of C++, and thus porting between them is relatively straightforward. Both dialects support templates, classes, lambdas, and other C++ constructs. -As one example, the hipify tool was originally a Perl script that used simple text conversions from CUDA to HIP. +As one example, the hipify-perl tool was originally a Perl script that used simple text conversions from CUDA to HIP. HIP and CUDA provide similar math library calls as well. In summary, the HIP philosophy was to make the HIP language close enough to CUDA that the porting effort is relatively simple. This reduces the potential for error, and also makes it easy to automate the translation. HIP's goal is to quickly get the ported program running on both platforms with little manual intervention, so that the programmer can focus on performance optimizations. @@ -140,11 +140,11 @@ The tools also struggle with more complex CUDA applications, in particular, thos - For AMD platforms, HIP runs on the same hardware that the HCC "hc" mode supports. See the ROCm documentation for the list of supported platforms. - For Nvidia platforms, HIP requires Unified Memory and should run on any device supporting CUDA SDK 6.0 or newer. We have tested the Nvidia Titan and Tesla K40. -### Does Hipify automatically convert all source code? -Typically, hipify can automatically convert almost all run-time code, and the coordinate indexing device code ( threadIdx.x -> hipThreadIdx_x ). +### Do HIPIFY tools automatically convert all source code? +Typically, HIPIFY tools can automatically convert almost all run-time code, and the coordinate indexing device code ( threadIdx.x -> hipThreadIdx_x ). Most device code needs no additional conversion since HIP and CUDA have similar names for math and built-in functions. The hipify-clang tool will automatically modify the kernel signature as needed (automating a step that used to be done manually). -Additional porting may be required to deal with architecture feature queries or with CUDA capabilities that HIP doesn't support. +Additional porting may be required to deal with architecture feature queries or with CUDA capabilities that HIP doesn't support. In general, developers should always expect to perform some platform-specific tuning and optimization. ### What is NVCC? diff --git a/docs/markdown/hip_kernel_language.md b/docs/markdown/hip_kernel_language.md index 04847101f6..395a343764 100644 --- a/docs/markdown/hip_kernel_language.md +++ b/docs/markdown/hip_kernel_language.md @@ -125,7 +125,7 @@ MyKernel<<>> (a,b,c,n); ``` -The hipLaunchKernel macro always starts with the five parameters specified above, followed by the kernel arguments. The Hipify script optionally converts Cuda launch syntax to hipLaunchKernel, including conversion of optional arguments in <<< >>> to the five required hipLaunchKernel parameters. The dim3 constructor accepts zero to three arguments and will by default initialize unspecified dimensions to 1. See [dim3](#dim3). The kernel uses the coordinate built-ins (hipThread*, hipBlock*, hipGrid*) to determine coordinate index and coordinate bounds of the work item that’s currently executing. See [Coordinate Built-Ins](#coordinate-builtins). +The hipLaunchKernel macro always starts with the five parameters specified above, followed by the kernel arguments. HIPIFY tools optionally convert Cuda launch syntax to hipLaunchKernel, including conversion of optional arguments in <<< >>> to the five required hipLaunchKernel parameters. The dim3 constructor accepts zero to three arguments and will by default initialize unspecified dimensions to 1. See [dim3](#dim3). The kernel uses the coordinate built-ins (hipThread*, hipBlock*, hipGrid*) to determine coordinate index and coordinate bounds of the work item that’s currently executing. See [Coordinate Built-Ins](#coordinate-builtins). ## Kernel-Launch Example @@ -724,7 +724,7 @@ CUDA defines a __launch_bounds which is also designed to control occupancy: __launch_bounds(MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MULTIPROCESSOR) ``` -- The second parameter __launch_bounds parameters must be converted to the format used __hip_launch_bounds, which uses warps and execution-units rather than blocks and multi-processors ( This conversion is performed automatically by the clang hipify tools.) +- The second parameter __launch_bounds parameters must be converted to the format used __hip_launch_bounds, which uses warps and execution-units rather than blocks and multi-processors (this conversion is performed automatically by hipify tools). ``` MIN_WARPS_PER_EXECUTION_UNIT = (MIN_BLOCKS_PER_MULTIPROCESSOR * MAX_THREADS_PER_BLOCK) / 32 ``` diff --git a/docs/markdown/hip_porting_driver_api.md b/docs/markdown/hip_porting_driver_api.md index 8e66780add..af70f35bb6 100644 --- a/docs/markdown/hip_porting_driver_api.md +++ b/docs/markdown/hip_porting_driver_api.md @@ -68,8 +68,8 @@ HIP provides a `Ctx` API as a thin layer over the existing Device functions. Thi The current context is implicitly used by other APIs such as `hipStreamCreate`. ### hipify translation of CUDA Driver API -The hipify tool converts CUDA Driver APIs for streams, events, modules, devices, memory management, context, profiler to the equivalent HIP driver calls. For example, `cuEventCreate` will be translated to `hipEventCreate`. -Hipify also converts error code from the Driver namespace and coding convention to the equivalent HIP error code. Thus, HIP unifies the APIs for these common functions. +The HIPIFY tools convert CUDA Driver APIs for streams, events, modules, devices, memory management, context, profiler to the equivalent HIP driver calls. For example, `cuEventCreate` will be translated to `hipEventCreate`. +HIPIFY tools also convert error codes from the Driver namespace and coding convention to the equivalent HIP error code. Thus, HIP unifies the APIs for these common functions. The memory copy API requires additional explanation. The CUDA driver includes the memory direction in the name of the API (ie `cuMemcpyH2D`) while the CUDA driver API provides a single memory copy API with a parameter that specifies the direction and additionally supports a "default" direction where the runtime determines the direction automatically. HIP provides APIs with both styles: for example, `hipMemcpyH2D` as well as `hipMemcpy`. diff --git a/docs/markdown/hip_porting_guide.md b/docs/markdown/hip_porting_guide.md index 4855fa4cea..c291fa8ae6 100644 --- a/docs/markdown/hip_porting_guide.md +++ b/docs/markdown/hip_porting_guide.md @@ -1,7 +1,7 @@ -# HIP Porting Guide +# HIP Porting Guide In addition to providing a portable C++ programming environment for GPUs, HIP is designed to ease the porting of existing CUDA code into the HIP environment. This section describes the available tools -and provides practical suggestions on how to port CUDA code and work through common issues. +and provides practical suggestions on how to port CUDA code and work through common issues. ## Table of Contents @@ -57,13 +57,13 @@ and provides practical suggestions on how to port CUDA code and work through com - Starting the port on a Cuda machine is often the easiest approach, since you can incrementally port pieces of the code to HIP while leaving the rest in Cuda. (Recall that on Cuda machines HIP is just a thin layer over Cuda, so the two code types can interoperate on nvcc platforms.) Also, the HIP port can be compared with the original Cuda code for function and performance. - Once the Cuda code is ported to HIP and is running on the Cuda machine, compile the HIP code using hcc on an AMD machine. - HIP ports can replace Cuda versions: HIP can deliver the same performance as a native Cuda implementation, with the benefit of portability to both Nvidia and AMD architectures as well as a path to future C++ standard support. You can handle platform-specific features through conditional compilation or by adding them to the open-source HIP infrastructure. -- Use **[bin/hipconvertinplace.sh](https://github.com/ROCm-Developer-Tools/HIP/blob/master/bin/hipconvertinplace.sh)** to hipify all code files in the Cuda source directory. +- Use **[bin/hipconvertinplace-perl.sh](https://github.com/ROCm-Developer-Tools/HIP/blob/master/bin/hipconvertinplace-perl.sh)** to hipify all code files in the Cuda source directory. ### Scanning existing CUDA code to scope the porting effort -The hipexamine.sh tool will scan a source directory to determine which files contain CUDA code and how much of that code can be automatically hipified, +The hipexamine-perl.sh tool will scan a source directory to determine which files contain CUDA code and how much of that code can be automatically hipified, ``` > cd examples/rodinia_3.0/cuda/kmeans -> $HIP_DIR/bin/hipexamine.sh . +> $HIP_DIR/bin/hipexamine-perl.sh. info: hipify ./kmeans.h =====> info: hipify ./unistd.h =====> info: hipify ./kmeans.c =====> @@ -80,7 +80,7 @@ info: TOTAL-converted 89 CUDA->HIP refs( dev:3 mem:32 kern:2 builtin:37 math:0 s kernels (1 total) : kmeansPoint(1) ``` -hipexamine scans each code file (cpp, c, h, hpp, etc.) found in the specified directory: +hipexamine-perl scans each code file (cpp, c, h, hpp, etc.) found in the specified directory: * Files with no CUDA code (ie kmeans.h) print one line summary just listing the source file name. * Files with CUDA code print a summary of what was found - for example the kmeans_cuda_kernel.cu file: @@ -94,7 +94,7 @@ info: hipify ./kmeans_cuda_kernel.cu =====> * Warning for code that looks like CUDA API but was not converted (0 in this file). * Count Lines-of-Code (LOC) - 185 for this file. -* hipexamine also presents a summary at the end of the process for the statistics collected across all files. This has similar format to the per-file reporting, and also includes a list of all kernels which have been called. An example from above: +* hipexamine-perl also presents a summary at the end of the process for the statistics collected across all files. This has similar format to the per-file reporting, and also includes a list of all kernels which have been called. An example from above: ```shell info: TOTAL-converted 89 CUDA->HIP refs( dev:3 mem:32 kern:2 builtin:37 math:0 stream:0 event:0 err:0 def:0 tex:15 other:0 ) warn:0 LOC:3607 @@ -104,24 +104,24 @@ info: TOTAL-converted 89 CUDA->HIP refs( dev:3 mem:32 kern:2 builtin:37 math:0 s ### Converting a project "in-place" ```shell -> hipify --inplace +> hipify-perl --inplace ``` For each input file FILE, this script will: - - If "FILE.prehip file does not exist, copy the original code to a new file with extension ".prehip". Then Hipify the code file. - - If "FILE.prehip" file exists, hipify FILE.prehip and save to FILE. + - If "FILE.prehip file does not exist, copy the original code to a new file with extension ".prehip". Then hipify the code file. + - If "FILE.prehip" file exists, hipify FILE.prehip and save to FILE. This is useful for testing improvements to the hipify toolset. -The [hipconvertinplace.sh](https://github.com/ROCm-Developer-Tools/HIP/blob/master/bin/hipconvertinplace.sh) script will perform inplace conversion for all code files in the specified directory. +The [hipconvertinplace-perl.sh](https://github.com/ROCm-Developer-Tools/HIP/blob/master/bin/hipconvertinplace-perl.sh) script will perform inplace conversion for all code files in the specified directory. This can be quite handy when dealing with an existing CUDA code base since the script preserves the existing directory structure and filenames - and includes work. After converting in-place, you can review the code to add additional parameters to directory names. ```shell -> hipconvertinplace.sh MY_SRC_DIR +> hipconvertinplace-perl.sh MY_SRC_DIR ``` ### Library Equivalents @@ -402,11 +402,11 @@ You can capture the hipconfig output and passed it to the standard compiler; bel CPPFLAGS += $(shell $(HIP_PATH)/bin/hipconfig --cpp_config) ``` -nvcc includes some headers by default. However, HIP does not include default headers, and instead all required files must be explicitly included. -Specifically, files that call HIP run-time APIs or define HIP kernels must explicitly include the appropriate HIP headers. +nvcc includes some headers by default. However, HIP does not include default headers, and instead all required files must be explicitly included. +Specifically, files that call HIP run-time APIs or define HIP kernels must explicitly include the appropriate HIP headers. If the compilation process reports that it cannot find necessary APIs (for example, "error: identifier ‘hipSetDevice’ is undefined"), -ensure that the file includes hip_runtime.h (or hip_runtime_api.h, if appropriate). -The hipify script automatically converts "cuda_runtime.h" to "hip_runtime.h," and it converts "cuda_runtime_api.h" to "hip_runtime_api.h", but it may miss nested headers or macros. +ensure that the file includes hip_runtime.h (or hip_runtime_api.h, if appropriate). +The hipify-perl script automatically converts "cuda_runtime.h" to "hip_runtime.h," and it converts "cuda_runtime_api.h" to "hip_runtime_api.h", but it may miss nested headers or macros. #### cuda.h diff --git a/docs/markdown/hip_terms2.md b/docs/markdown/hip_terms2.md index be859ffb32..3b4661729d 100644 --- a/docs/markdown/hip_terms2.md +++ b/docs/markdown/hip_terms2.md @@ -1,18 +1,18 @@ # Terms used in HIP Documentation -- host, host cpu : Executes the HIP runtime API and is capable of initiating kernel launches to one or more devices. -- default device : Each host thread maintains a "default device". +- host, host cpu : Executes the HIP runtime API and is capable of initiating kernel launches to one or more devices. +- default device : Each host thread maintains a "default device". Most HIP runtime APIs (including memory allocation, copy commands, kernel launches) do not use accept an explicit device argument but instead implicitly use the default device. The default device can be set with hipSetDevice. -- "active host thread" - the thread which is running the HIP APIs. +- "active host thread" - the thread which is running the HIP APIs. -- completion_future becomes ready. "Completes" +- completion_future becomes ready. "Completes". -- hcc = Heterogeneous Compute Compiler (https://bitbucket.org/multicoreware/hcc/wiki/Home). +- hcc = Heterogeneous Compute Compiler (https://bitbucket.org/multicoreware/hcc/wiki/Home). -- hipify - tool to convert CUDA(R) code to portable C++ code. +- hipify tools - tools to convert CUDA(R) code to portable C++ code (https://github.com/ROCm-Developer-Tools/HIPIFY). - hipconfig - tool to report various configuration properties of the target platform. - nvcc = nvcc compiler, do not capitalize. diff --git a/packaging/convert_md_to_html.sh b/packaging/convert_md_to_html.sh index e6442a9ef2..fc222914bb 100755 --- a/packaging/convert_md_to_html.sh +++ b/packaging/convert_md_to_html.sh @@ -21,24 +21,22 @@ trap cleanup EXIT export GRIPURL=$hip_srcdir export GRIPHOME=$workdir echo "CACHE_DIRECTORY = '$html_destdir/asset'" > $workdir/settings.py -mkdir -p $html_destdir $html_destdir/hipify-clang $html_destdir/docs/markdown +mkdir -p $html_destdir $html_destdir/docs/markdown # convert all md files to html pushd $hip_srcdir -for f in *.md hipify-clang/*.md docs/markdown/*.md; do grip --export --no-inline $f $html_destdir/${f%.*}.html; done +for f in *.md docs/markdown/*.md; do grip --export --no-inline $f $html_destdir/${f%.*}.html; done popd # convert absolute links to relative links pushd $html_destdir for f in *.html; do sed -i "s?$GRIPURL/??g" $f; done -for f in hipify-clang/*.html; do sed -i "s?$GRIPURL/?../?g" $f; done for f in docs/markdown/*.html; do sed -i "s?$GRIPURL/?../../?g" $f; done popd # update document titles pushd $html_destdir for f in *.html; do sed -i "s?.md - Grip??g" $f; done -for f in hipify-clang/*.html; do sed -i "s?.md - Grip??g" $f; done for f in docs/markdown/*.html; do sed -i "s?.md - Grip??g" $f; done popd @@ -46,8 +44,6 @@ popd pushd $html_destdir for f in *.html; do sed -i "s?.md\"?.html\"?g" $f; done for f in *.html; do sed -i "s?.md#?.html#?g" $f; done -for f in hipify-clang/*.html; do sed -i "s?.md\"?.html\"?g" $f; done -for f in hipify-clang/*.html; do sed -i "s?.md#?.html#?g" $f; done for f in docs/markdown/*.html; do sed -i "s?.md\"?.html\"?g" $f; done for f in docs/markdown/*.html; do sed -i "s?.md#?.html#?g" $f; done popd diff --git a/samples/0_Intro/square/README.md b/samples/0_Intro/square/README.md index 7a9e04fc5f..c185903993 100644 --- a/samples/0_Intro/square/README.md +++ b/samples/0_Intro/square/README.md @@ -1,6 +1,6 @@ # Square.md -Simple test which shows how to use hipify to port CUDA code to HIP. +Simple test which shows how to use hipify-perl to port CUDA code to HIP. See related [blog](http://gpuopen.com/hip-to-be-squared-an-introductory-hip-tutorial) that explains the example. Now it is even simpler and requires no manual modification to the hipified source code - just hipify and compile: diff --git a/samples/2_Cookbook/0_MatrixTranspose/Readme.md b/samples/2_Cookbook/0_MatrixTranspose/Readme.md index 9e1a342a07..432f9180dc 100644 --- a/samples/2_Cookbook/0_MatrixTranspose/Readme.md +++ b/samples/2_Cookbook/0_MatrixTranspose/Readme.md @@ -96,6 +96,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/10_inline_asm/Readme.md b/samples/2_Cookbook/10_inline_asm/Readme.md index f65bbdcf20..e86085b648 100644 --- a/samples/2_Cookbook/10_inline_asm/Readme.md +++ b/samples/2_Cookbook/10_inline_asm/Readme.md @@ -55,6 +55,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md b/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md index 1430e58ecc..937da30af0 100644 --- a/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md +++ b/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md @@ -48,6 +48,6 @@ make - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/1_hipEvent/Readme.md b/samples/2_Cookbook/1_hipEvent/Readme.md index c12c76e701..2bd389e25e 100644 --- a/samples/2_Cookbook/1_hipEvent/Readme.md +++ b/samples/2_Cookbook/1_hipEvent/Readme.md @@ -75,6 +75,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/3_shared_memory/Readme.md b/samples/2_Cookbook/3_shared_memory/Readme.md index ad23d58f73..756cb6e7f2 100644 --- a/samples/2_Cookbook/3_shared_memory/Readme.md +++ b/samples/2_Cookbook/3_shared_memory/Readme.md @@ -37,6 +37,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/4_shfl/Readme.md b/samples/2_Cookbook/4_shfl/Readme.md index 6adc98fb4e..ac5dff9292 100644 --- a/samples/2_Cookbook/4_shfl/Readme.md +++ b/samples/2_Cookbook/4_shfl/Readme.md @@ -48,6 +48,6 @@ please make sure you have a 3.0 or higher compute capable device in order to use - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/5_2dshfl/Readme.md b/samples/2_Cookbook/5_2dshfl/Readme.md index cc9484377b..fa10c71d6c 100644 --- a/samples/2_Cookbook/5_2dshfl/Readme.md +++ b/samples/2_Cookbook/5_2dshfl/Readme.md @@ -50,6 +50,6 @@ please make sure you have a 3.0 or higher compute capable device in order to use - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/6_dynamic_shared/Readme.md b/samples/2_Cookbook/6_dynamic_shared/Readme.md index 047cc94278..68782807bf 100644 --- a/samples/2_Cookbook/6_dynamic_shared/Readme.md +++ b/samples/2_Cookbook/6_dynamic_shared/Readme.md @@ -44,6 +44,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/7_streams/Readme.md b/samples/2_Cookbook/7_streams/Readme.md index 1c9186791c..14b6a9762a 100644 --- a/samples/2_Cookbook/7_streams/Readme.md +++ b/samples/2_Cookbook/7_streams/Readme.md @@ -58,6 +58,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/9_unroll/Readme.md b/samples/2_Cookbook/9_unroll/Readme.md index c6b8a8cf35..6fad55e3c9 100644 --- a/samples/2_Cookbook/9_unroll/Readme.md +++ b/samples/2_Cookbook/9_unroll/Readme.md @@ -43,6 +43,6 @@ please make sure you have a 3.0 or higher compute capable device in order to use - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) From 5f1420a229b41662f1f3f178de56daa10028d39c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 7 Apr 2020 11:33:30 +0300 Subject: [PATCH 070/132] [HIP][HIPIFY] Remove HIPIFY related stuff due to moving of HIPIFY to a separate repo (#2002) HIPIFY tools now are here: https://github.com/ROCm-Developer-Tools/HIPIFY --- hipify-clang/CMakeLists.txt | 198 -- hipify-clang/README.md | 507 ------ hipify-clang/packaging/hipify-clang.txt | 58 - .../patch_for_clang_7.0.0_bug_38811.zip | Bin 14641 -> 0 bytes .../patch_for_clang_7.0.1_bug_38811.zip | Bin 14641 -> 0 bytes .../patch_for_clang_7.1.0_bug_38811.zip | Bin 14641 -> 0 bytes .../patch_for_clang_8.0.0_bug_38811.zip | Bin 14641 -> 0 bytes .../patch_for_clang_8.0.1_bug_38811.zip | Bin 14641 -> 0 bytes hipify-clang/src/ArgParse.cpp | 149 -- hipify-clang/src/ArgParse.h | 55 - hipify-clang/src/CUDA2HIP.cpp | 110 -- hipify-clang/src/CUDA2HIP.h | 81 - .../src/CUDA2HIP_BLAS_API_functions.cpp | 671 ------- hipify-clang/src/CUDA2HIP_BLAS_API_types.cpp | 158 -- .../src/CUDA2HIP_CAFFE2_API_functions.cpp | 28 - .../src/CUDA2HIP_CAFFE2_API_types.cpp | 34 - hipify-clang/src/CUDA2HIP_CUB_API_types.cpp | 28 - .../src/CUDA2HIP_Complex_API_functions.cpp | 50 - .../src/CUDA2HIP_Complex_API_types.cpp | 30 - .../src/CUDA2HIP_DNN_API_functions.cpp | 299 --- hipify-clang/src/CUDA2HIP_DNN_API_types.cpp | 391 ---- .../src/CUDA2HIP_Device_functions.cpp | 616 ------- .../src/CUDA2HIP_Driver_API_functions.cpp | 815 --------- .../src/CUDA2HIP_Driver_API_types.cpp | 1617 ----------------- .../src/CUDA2HIP_FFT_API_functions.cpp | 59 - hipify-clang/src/CUDA2HIP_FFT_API_types.cpp | 71 - hipify-clang/src/CUDA2HIP_Perl.cpp | 488 ----- hipify-clang/src/CUDA2HIP_Python.cpp | 103 -- .../src/CUDA2HIP_RAND_API_functions.cpp | 86 - hipify-clang/src/CUDA2HIP_RAND_API_types.cpp | 140 -- .../src/CUDA2HIP_Runtime_API_functions.cpp | 693 ------- .../src/CUDA2HIP_Runtime_API_types.cpp | 1426 --------------- .../src/CUDA2HIP_SPARSE_API_functions.cpp | 758 -------- .../src/CUDA2HIP_SPARSE_API_types.cpp | 187 -- hipify-clang/src/CUDA2HIP_Scripting.h | 41 - hipify-clang/src/HipifyAction.cpp | 755 -------- hipify-clang/src/HipifyAction.h | 110 -- hipify-clang/src/LLVMCompat.cpp | 154 -- hipify-clang/src/LLVMCompat.h | 94 - .../src/ReplacementsFrontendActionFactory.h | 55 - hipify-clang/src/Statistics.cpp | 368 ---- hipify-clang/src/Statistics.h | 250 --- hipify-clang/src/StringUtils.cpp | 97 - hipify-clang/src/StringUtils.h | 48 - hipify-clang/src/main.cpp | 352 ---- tests/hipify-clang/lit.cfg | 151 -- tests/hipify-clang/lit.site.cfg.in | 38 - tests/hipify-clang/run_test.bat | 21 - tests/hipify-clang/run_test.sh | 18 - .../unit_tests/casts/reinterpret_cast.cu | 52 - .../hipify-clang/unit_tests/device/atomics.cu | 286 --- .../unit_tests/device/device_symbols.cu | 152 -- .../unit_tests/device/math_functions.cu | 58 - .../unit_tests/headers/headers_test_01.cu | 8 - .../unit_tests/headers/headers_test_02.cu | 8 - .../unit_tests/headers/headers_test_03.cu | 10 - .../unit_tests/headers/headers_test_04.cu | 12 - .../unit_tests/headers/headers_test_05.cu | 12 - .../unit_tests/headers/headers_test_06.cu | 8 - .../unit_tests/headers/headers_test_07.cu | 8 - .../unit_tests/headers/headers_test_08.cu | 14 - .../unit_tests/headers/headers_test_09.cu | 100 - .../unit_tests/headers/headers_test_10.cu | 14 - .../unit_tests/headers/headers_test_11.cu | 14 - .../kernel_launch/kernel_launch_01.cu | 46 - .../CAFFE2/caffe2/core/common_cudnn.h | 7 - .../caffe2/operators/spatial_batch_norm_op.h | 14 - .../unit_tests/libraries/CAFFE2/caffe2_01.cu | 12 - .../unit_tests/libraries/CAFFE2/caffe2_02.cu | 102 -- .../unit_tests/libraries/CUB/cub_01.cu | 60 - .../unit_tests/libraries/CUB/cub_02.cu | 69 - .../unit_tests/libraries/CUB/cub_03.cu | 33 - .../cuBLAS/cublas_0_based_indexing.cu | 81 - .../cuBLAS/cublas_1_based_indexing.cu | 90 - .../cublas_sgemm_matrix_multiplication.cu | 108 -- .../cublas_0_based_indexing_rocblas.cu | 81 - .../cublas_1_based_indexing_rocblas.cu | 90 - ...las_sgemm_matrix_multiplication_rocblas.cu | 108 -- .../libraries/cuComplex/cuComplex_Julia.cu | 58 - .../cuDNN/cudnn_convolution_forward.cu | 267 --- .../libraries/cuDNN/cudnn_softmax.cu | 159 -- .../libraries/cuFFT/simple_cufft.cu | 78 - .../cuRAND/benchmark_curand_generate.cpp | 346 ---- .../cuRAND/benchmark_curand_kernel.cpp | 673 ------- .../unit_tests/libraries/cuRAND/cmdparser.hpp | 494 ----- .../libraries/cuRAND/poisson_api_example.cu | 416 ----- .../libraries/cuSPARSE/cuSPARSE_01.cu | 367 ---- .../libraries/cuSPARSE/cuSPARSE_02.cu | 284 --- .../libraries/cuSPARSE/cuSPARSE_03.cu | 229 --- .../libraries/cuSPARSE/cuSPARSE_04.cu | 261 --- .../libraries/cuSPARSE/cuSPARSE_05.cu | 288 --- .../libraries/cuSPARSE/cuSPARSE_06.cu | 269 --- .../libraries/cuSPARSE/cuSPARSE_07.cu | 302 --- .../libraries/cuSPARSE/cuSPARSE_08.cu | 413 ----- .../libraries/cuSPARSE/cuSPARSE_09.cu | 414 ----- .../libraries/cuSPARSE/cuSPARSE_10.cu | 507 ------ .../libraries/cuSPARSE/cuSPARSE_11.cu | 327 ---- .../libraries/cuSPARSE/cuSPARSE_12.cu | 410 ----- .../unit_tests/namespace/ns_kernel_launch.cu | 28 - .../unit_tests/pp/pp_if_else_conditionals.cu | 30 - .../pp/pp_if_else_conditionals_01.cu | 52 - .../pp/pp_if_else_conditionals_01_LLVM_10.cu | 52 - .../pp/pp_if_else_conditionals_LLVM_10.cu | 30 - .../0_MatrixTranspose/MatrixTranspose.cpp | 130 -- .../11_texture_driver/tex2dKernel.cpp | 36 - .../11_texture_driver/texture2dDrv.cpp | 169 -- .../2_Cookbook/13_occupancy/occupancy.cpp | 198 -- .../2_Cookbook/1_hipEvent/hipEvent.cpp | 181 -- .../2_Cookbook/2_Profiler/Profiler.cpp | 250 --- .../samples/2_Cookbook/7_streams/stream.cpp | 153 -- .../2_Cookbook/8_peer2peer/peer2peer.cpp | 225 --- .../unit_tests/samples/MallocManaged.cpp | 45 - .../unit_tests/samples/allocators.cu | 53 - tests/hipify-clang/unit_tests/samples/axpy.cu | 97 - .../unit_tests/samples/coalescing.cu | 117 -- .../unit_tests/samples/cudaRegister.cu | 106 -- .../samples/dynamic_shared_memory.cu | 45 - .../hipify-clang/unit_tests/samples/intro.cu | 174 -- .../hipify-clang/unit_tests/samples/square.cu | 112 -- .../samples/static_shared_memory.cu | 45 - .../unit_tests/samples/vec_add.cu | 89 - 121 files changed, 23094 deletions(-) delete mode 100644 hipify-clang/CMakeLists.txt delete mode 100644 hipify-clang/README.md delete mode 100644 hipify-clang/packaging/hipify-clang.txt delete mode 100644 hipify-clang/patches/patch_for_clang_7.0.0_bug_38811.zip delete mode 100644 hipify-clang/patches/patch_for_clang_7.0.1_bug_38811.zip delete mode 100644 hipify-clang/patches/patch_for_clang_7.1.0_bug_38811.zip delete mode 100644 hipify-clang/patches/patch_for_clang_8.0.0_bug_38811.zip delete mode 100644 hipify-clang/patches/patch_for_clang_8.0.1_bug_38811.zip delete mode 100644 hipify-clang/src/ArgParse.cpp delete mode 100644 hipify-clang/src/ArgParse.h delete mode 100644 hipify-clang/src/CUDA2HIP.cpp delete mode 100644 hipify-clang/src/CUDA2HIP.h delete mode 100644 hipify-clang/src/CUDA2HIP_BLAS_API_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_BLAS_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_CAFFE2_API_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_CAFFE2_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_CUB_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Complex_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_DNN_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Device_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Driver_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_FFT_API_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_FFT_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Perl.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Python.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_RAND_API_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_RAND_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp delete mode 100644 hipify-clang/src/CUDA2HIP_Scripting.h delete mode 100644 hipify-clang/src/HipifyAction.cpp delete mode 100644 hipify-clang/src/HipifyAction.h delete mode 100644 hipify-clang/src/LLVMCompat.cpp delete mode 100644 hipify-clang/src/LLVMCompat.h delete mode 100644 hipify-clang/src/ReplacementsFrontendActionFactory.h delete mode 100644 hipify-clang/src/Statistics.cpp delete mode 100644 hipify-clang/src/Statistics.h delete mode 100644 hipify-clang/src/StringUtils.cpp delete mode 100644 hipify-clang/src/StringUtils.h delete mode 100644 hipify-clang/src/main.cpp delete mode 100644 tests/hipify-clang/lit.cfg delete mode 100644 tests/hipify-clang/lit.site.cfg.in delete mode 100644 tests/hipify-clang/run_test.bat delete mode 100755 tests/hipify-clang/run_test.sh delete mode 100644 tests/hipify-clang/unit_tests/casts/reinterpret_cast.cu delete mode 100644 tests/hipify-clang/unit_tests/device/atomics.cu delete mode 100644 tests/hipify-clang/unit_tests/device/device_symbols.cu delete mode 100644 tests/hipify-clang/unit_tests/device/math_functions.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_01.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_02.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_03.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_04.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_05.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_06.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_07.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_08.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_09.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_10.cu delete mode 100644 tests/hipify-clang/unit_tests/headers/headers_test_11.cu delete mode 100644 tests/hipify-clang/unit_tests/kernel_launch/kernel_launch_01.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/core/common_cudnn.h delete mode 100644 tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/operators/spatial_batch_norm_op.h delete mode 100644 tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_01.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_02.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/CUB/cub_02.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuComplex/cuComplex_Julia.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuDNN/cudnn_softmax.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuFFT/simple_cufft.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuRAND/cmdparser.hpp delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu delete mode 100644 tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu delete mode 100644 tests/hipify-clang/unit_tests/namespace/ns_kernel_launch.cu delete mode 100644 tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals.cu delete mode 100644 tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01.cu delete mode 100644 tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu delete mode 100644 tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu delete mode 100644 tests/hipify-clang/unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp delete mode 100644 tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp delete mode 100644 tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp delete mode 100644 tests/hipify-clang/unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp delete mode 100644 tests/hipify-clang/unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp delete mode 100644 tests/hipify-clang/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp delete mode 100644 tests/hipify-clang/unit_tests/samples/2_Cookbook/7_streams/stream.cpp delete mode 100644 tests/hipify-clang/unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp delete mode 100644 tests/hipify-clang/unit_tests/samples/MallocManaged.cpp delete mode 100644 tests/hipify-clang/unit_tests/samples/allocators.cu delete mode 100644 tests/hipify-clang/unit_tests/samples/axpy.cu delete mode 100644 tests/hipify-clang/unit_tests/samples/coalescing.cu delete mode 100644 tests/hipify-clang/unit_tests/samples/cudaRegister.cu delete mode 100644 tests/hipify-clang/unit_tests/samples/dynamic_shared_memory.cu delete mode 100644 tests/hipify-clang/unit_tests/samples/intro.cu delete mode 100644 tests/hipify-clang/unit_tests/samples/square.cu delete mode 100644 tests/hipify-clang/unit_tests/samples/static_shared_memory.cu delete mode 100644 tests/hipify-clang/unit_tests/samples/vec_add.cu diff --git a/hipify-clang/CMakeLists.txt b/hipify-clang/CMakeLists.txt deleted file mode 100644 index 875b5dad74..0000000000 --- a/hipify-clang/CMakeLists.txt +++ /dev/null @@ -1,198 +0,0 @@ -cmake_minimum_required(VERSION 3.5.1) - -project(hipify-clang) - -if (MSVC AND MSVC_VERSION VERSION_LESS "1900") - message(SEND_ERROR "hipify-clang could be built by Visual Studio 14 2015 or higher.") - return() -endif() - -find_package(LLVM REQUIRED) -message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}:") -message(STATUS " - CMake module path: ${LLVM_CMAKE_DIR}") -message(STATUS " - Include path : ${LLVM_INCLUDE_DIRS}") -message(STATUS " - Binary path : ${LLVM_TOOLS_BINARY_DIR}") - -option(HIPIFY_CLANG_TESTS "Build the tests for hipify-clang, if lit is installed" OFF) - -list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) -include(AddLLVM) - -include_directories(${LLVM_INCLUDE_DIRS}) -link_directories(${LLVM_LIBRARY_DIRS}) -add_definitions(${LLVM_DEFINITIONS}) - -file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp) -file(GLOB_RECURSE HIPIFY_HEADERS src/*.h) -add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS}) - -set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) -set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang) - -# Link against LLVM and CLANG libraries -target_link_libraries(hipify-clang PRIVATE - clangASTMatchers - clangFrontend - clangTooling - clangParse - clangSerialization - clangSema - clangEdit - clangFormat - clangLex - clangAnalysis - clangDriver - clangAST - clangToolingCore - clangRewrite - clangBasic - LLVMProfileData - LLVMSupport - LLVMMCParser - LLVMMC - LLVMBitReader - LLVMOption - LLVMCore) - -if (LLVM_PACKAGE_VERSION VERSION_GREATER "6.0.1") - target_link_libraries(hipify-clang PRIVATE clangToolingInclusions) -endif() - -if (LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1") - target_link_libraries(hipify-clang PRIVATE LLVMFrontendOpenMP) -endif() - -if (MSVC) - target_link_libraries(hipify-clang PRIVATE version) - target_compile_options(hipify-clang PRIVATE "/Od /GR- /EHs- /EHc-") - set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} /SUBSYSTEM:WINDOWS") - set(StdCpp "/std:c++") -else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -fno-rtti -fvisibility-inlines-hidden") - set(StdCpp "-std=c++") -endif() - -if (LLVM_PACKAGE_VERSION VERSION_GREATER "9.0") - string(APPEND StdCpp "14") -# MSVC starting from 1900 (VS 2015) supports only the following c++ std values: c++14|c++17|c++latest -elseif (MSVC) - set(StdCpp "") -else() - string(APPEND StdCpp "11") -endif() - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} ${StdCpp} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}\\\"") - -set(INSTALL_PATH_DOC_STRING "Installation path for hipify-clang") -set(HIPIFY_INSTALL_PATH ${CMAKE_INSTALL_PREFIX}) -if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - if(CMAKE_BUILD_TYPE MATCHES Debug) - set(HIPIFY_INSTALL_PATH "${CMAKE_CURRENT_SOURCE_DIR}/bin" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) - elseif(CMAKE_BUILD_TYPE MATCHES Release) - if (BIN_INSTALL_DIR) - set(HIPIFY_INSTALL_PATH "${BIN_INSTALL_DIR}" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) - else() - set(HIPIFY_INSTALL_PATH "${PROJECT_BINARY_DIR}/bin" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) - endif() - else() - message(FATAL_ERROR "Invalid CMAKE_BUILD_TYPE specified. Valid values are Debug and Release") - endif() -elseif(BIN_INSTALL_DIR) - set(HIPIFY_INSTALL_PATH "${BIN_INSTALL_DIR}" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) -endif() - -install(TARGETS hipify-clang DESTINATION ${HIPIFY_INSTALL_PATH}) - -install( - DIRECTORY ${LLVM_DIR}/../../clang/${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}/ - DESTINATION ${HIPIFY_INSTALL_PATH} - COMPONENT clang-resource-headers - FILES_MATCHING - PATTERN "*.h" - PATTERN "*.modulemap" - PATTERN "algorithm" - PATTERN "complex" - PATTERN "new" - PATTERN "ppc_wrappers" EXCLUDE - PATTERN "openmp_wrappers" EXCLUDE) - -if (UNIX) - set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hipify-clang) - configure_file(packaging/hipify-clang.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) - add_custom_target(pkg_hipify-clang COMMAND ${CMAKE_COMMAND} . - COMMAND rm -rf *.deb *.rpm *.tar.gz - COMMAND make package - COMMAND cp *.deb ${PROJECT_BINARY_DIR} - COMMAND cp *.rpm ${PROJECT_BINARY_DIR} - COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} - WORKING_DIRECTORY ${BUILD_DIR}) - - file(GENERATE OUTPUT ${PROJECT_BINARY_DIR}/fixnames - CONTENT "pwd; for i in *.deb; do mv \"\$i\" \"\${i/.deb/-amd64.deb}\" ; done - for i in *.rpm ; do mv \$i \${i/.rpm/.x86_64.rpm} ; done") - - add_custom_target(package_hipify-clang - COMMAND bash ${PROJECT_BINARY_DIR}/fixnames - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - DEPENDS pkg_hipify-clang) -endif() - -if (HIPIFY_CLANG_TESTS) - find_package(PythonInterp 2.7 REQUIRED) - - function (require_program PROGRAM_NAME) - find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME}) - if (FOUND_${PROGRAM_NAME}) - message(STATUS "Found ${PROGRAM_NAME}: ${FOUND_${PROGRAM_NAME}}") - else() - message(SEND_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS to OFF to disable hipify tests, or install the missing program.") - endif() - endfunction() - - require_program(lit) - require_program(FileCheck) - - find_package(CUDA REQUIRED) - if ((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR - (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR - (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR - (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR - (CUDA_VERSION VERSION_GREATER "9.2" AND LLVM_PACKAGE_VERSION VERSION_LESS "8.0") OR - (CUDA_VERSION VERSION_GREATER "10.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "9.0") OR - (CUDA_VERSION VERSION_GREATER "10.1" AND LLVM_PACKAGE_VERSION VERSION_LESS "10.0")) - message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by LLVM ${LLVM_PACKAGE_VERSION}.") - if (CUDA_VERSION_MAJOR VERSION_LESS "7") - message(STATUS "Please install CUDA 7.0 or higher.") - elseif (CUDA_VERSION_MAJOR VERSION_LESS "8") - message(STATUS "Please install LLVM + clang 3.8 or higher.") - elseif (CUDA_VERSION_MAJOR VERSION_LESS "9") - message(STATUS "Please install LLVM + clang 4.0 or higher.") - elseif (CUDA_VERSION VERSION_EQUAL "9.0") - message(STATUS "Please install LLVM + clang 6.0 or higher.") - elseif (CUDA_VERSION_MAJOR VERSION_LESS "10") - message(STATUS "Please install LLVM + clang 7.0 or higher.") - elseif (CUDA_VERSION VERSION_EQUAL "10.0") - message(STATUS "Please install LLVM + clang 8.0 or higher.") - elseif (CUDA_VERSION VERSION_EQUAL "10.1") - message(STATUS "Please install LLVM + clang 9.0 or higher.") - elseif (CUDA_VERSION VERSION_EQUAL "10.2") - message(STATUS "Please install LLVM + clang 10.0 or higher.") - endif() - endif() - - configure_file( - ${CMAKE_CURRENT_LIST_DIR}/../tests/hipify-clang/lit.site.cfg.in - ${CMAKE_CURRENT_BINARY_DIR}/tests/hipify-clang/lit.site.cfg - @ONLY) - - add_lit_testsuite(test-hipify "Running HIPify regression tests" - ${CMAKE_CURRENT_LIST_DIR}/../tests/hipify-clang - PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/hipify-clang/lit.site.cfg - ARGS -v - DEPENDS hipify-clang) - - add_custom_target(test-hipify-clang) - add_dependencies(test-hipify-clang test-hipify) - set_target_properties(test-hipify-clang PROPERTIES FOLDER "Tests") -endif() diff --git a/hipify-clang/README.md b/hipify-clang/README.md deleted file mode 100644 index 8cdeceace0..0000000000 --- a/hipify-clang/README.md +++ /dev/null @@ -1,507 +0,0 @@ -# HIPIFY -### Tools to translate CUDA source code into portable HIP C++ automatically -## Table of Contents - - - -- [hipify-clang](#clang) - * [Dependencies](#dependencies) - * [Usage](#hipify-clang-usage) - * [Building](#building) - * [Testing](#testing) - * [Linux](#linux) - * [Windows](#windows) -- [hipify-perl](#perl) - * [Usage](#hipify-perl-usage) - * [Building](#hipify-perl-building) -- [Supported CUDA APIs](#cuda-apis) -- [Disclaimer](#disclaimer) - - - -## hipify-clang - -`hipify-clang` is a clang-based tool for translation CUDA sources into HIP sources. -It translates CUDA source into an abstract syntax tree, which is being traversed by transformation matchers. -After applying all the matchers, the output HIP source is produced. - -**Advantages:** - -1. It is a translator; thus, any even very complicated constructs will be parsed successfully, or an error will be reported. -2. It supports clang options like [`-I`](https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang-i-dir), [`-D`](https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang-d-macro), [`--cuda-path`](https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang-cuda-path), etc. -3. Seamless support of new CUDA versions as it is clang's responsibility. -4. Ease in support. - -**Disadvantages:** - -1. The main advantage is also the main disadvantage: the input CUDA code should be correct; incorrect code wouldn't be translated to HIP. -2. CUDA should be installed and provided in case of multiple installations by `--cuda-path` option. -3. All the includes and defines should be provided to transform code successfully. - -### hipify-clang: dependencies - -`hipify-clang` requires: - -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [3.8.0](http://releases.llvm.org/download.html#3.8.0); the latest stable and recommended release: [**10.0.0**](http://releases.llvm.org/download.html#10.0.0). - -2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [7.0](https://developer.nvidia.com/cuda-toolkit-70), the latest supported version is [**10.2**](https://developer.nvidia.com/cuda-downloads). - -| **LLVM release version** | **CUDA latest supported version** | **Windows** | **Linux** | -|:----------------------------------------------------------:|:------------------------------------------------------------------------:|:-----------:|:---------:| -| [3.8.0](http://releases.llvm.org/download.html#3.8.0) | [7.5](https://developer.nvidia.com/cuda-75-downloads-archive) | + | + | -| [3.8.1](http://releases.llvm.org/download.html#3.8.1) | [7.5](https://developer.nvidia.com/cuda-75-downloads-archive) | + | + | -| [3.9.0](http://releases.llvm.org/download.html#3.9.0) | [7.5](https://developer.nvidia.com/cuda-75-downloads-archive) | + | + | -| [3.9.1](http://releases.llvm.org/download.html#3.9.1) | [7.5](https://developer.nvidia.com/cuda-75-downloads-archive) | + | + | -| [4.0.0](http://releases.llvm.org/download.html#4.0.0) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [4.0.1](http://releases.llvm.org/download.html#4.0.1) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [5.0.0](http://releases.llvm.org/download.html#5.0.0) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [5.0.1](http://releases.llvm.org/download.html#5.0.1) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [5.0.2](http://releases.llvm.org/download.html#5.0.2) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [6.0.0](http://releases.llvm.org/download.html#6.0.0) | [9.0](https://developer.nvidia.com/cuda-90-download-archive) | + | + | -| [6.0.1](http://releases.llvm.org/download.html#6.0.1) | [9.0](https://developer.nvidia.com/cuda-90-download-archive) | + | + | -| [7.0.0](http://releases.llvm.org/download.html#7.0.0) | [9.2](https://developer.nvidia.com/cuda-92-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_7.0.0_bug_38811.zip)*
| -
not working due to
the clang's bug [36384](https://bugs.llvm.org/show_bug.cgi?id=36384) | -| [7.0.1](http://releases.llvm.org/download.html#7.0.1) | [9.2](https://developer.nvidia.com/cuda-92-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_7.0.1_bug_38811.zip)*
| -
not working due to
the clang's bug [36384](https://bugs.llvm.org/show_bug.cgi?id=36384) | -| [7.1.0](http://releases.llvm.org/download.html#7.1.0) | [9.2](https://developer.nvidia.com/cuda-92-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_7.1.0_bug_38811.zip)*
| -
not working due to
the clang's bug [36384](https://bugs.llvm.org/show_bug.cgi?id=36384) | -| [8.0.0](http://releases.llvm.org/download.html#8.0.0) | [10.0](https://developer.nvidia.com/cuda-10.0-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_8.0.0_bug_38811.zip)*
| + | -| [8.0.1](http://releases.llvm.org/download.html#8.0.1) | [10.0](https://developer.nvidia.com/cuda-10.0-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_8.0.1_bug_38811.zip)*
| + | -| [9.0.0](http://releases.llvm.org/download.html#9.0.0) | [10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) | + | + | -| [9.0.1](http://releases.llvm.org/download.html#9.0.1) | [10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) | + | + | -| [**10.0.0**](http://releases.llvm.org/download.html#10.0.0)| [**10.2**](https://developer.nvidia.com/cuda-downloads) | +
**LATEST STABLE RELEASE** | +
**LATEST STABLE RELEASE** | - -`*` Download the patch and unpack it into your `LLVM` distributive directory; a few header files will be overwritten; rebuilding of `LLVM` is not needed. - -In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. - -Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\10.0.0\dist` - -### hipify-clang: usage - -To process a file, `hipify-clang` needs access to the same headers that would be required to compile it with clang. - -For example: - -```shell -./hipify-clang square.cu --cuda-path=/usr/local/cuda-10.2 -I /usr/local/cuda-10.2/samples/common/inc -``` - -`hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you -were compiling the input file. For example: - -```bash -./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-10.2 -- -std=c++17 -``` - -The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful. - -For a list of `hipify-clang` options, run `hipify-clang --help`. - -### hipify-clang: building - -Assuming this repository is at `./HIP`: - -```bash -cd hipify-clang -mkdir build dist -cd build - -cmake \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_BUILD_TYPE=Release \ - .. - -make -j install -``` -On Windows, the following option should be specified for `cmake` at first place: `-G "Visual Studio 16 2019 Win64"`; the generated `hipify-clang.sln` should be built by `Visual Studio 16 2019` instead of `make.` -Please, see [hipify-clang: Windows](#windows) for the supported tools for building. - -Debug build type `-DCMAKE_BUILD_TYPE=Debug` is also supported and tested; `LLVM+CLANG` should be built in `Debug` mode as well. -64-bit build mode (`-Thost=x64` on Windows) is also supported; `LLVM+CLANG` should be built in 64-bit mode as well. - -The binary can then be found at `./dist/bin/hipify-clang`. - -### hipify-clang: testing - -`hipify-clang` has unit tests using `LLVM` [`lit`](https://llvm.org/docs/CommandGuide/lit.html)/[`FileCheck`](https://llvm.org/docs/CommandGuide/FileCheck.html). - -`LLVM+CLANG` should be built from sources, pre-built binaries are not exhaustive for testing. - -**LLVM 9.0.1 or older:** - -1. download [`LLVM`](http://releases.llvm.org/9.0.1/llvm-9.0.1.src.tar.xz)+[`CLANG`](http://releases.llvm.org/9.0.1/cfe-9.0.1.src.tar.xz) sources; -2. build [`LLVM+CLANG`](http://releases.llvm.org/9.0.0/docs/CMake.html): - - **Linux**: - ```bash - cmake \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DLLVM_SOURCE_DIR=../llvm \ - -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \ - -DCMAKE_BUILD_TYPE=Release \ - ../llvm - make -j install - ``` - **Windows**: - ```shell - cmake \ - -G "Visual Studio 16 2019" \ - -A x64 \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DLLVM_SOURCE_DIR=../llvm \ - -DLLVM_TARGETS_TO_BUILD="NVPTX" \ - -DCMAKE_BUILD_TYPE=Release \ - -Thost=x64 \ - ../llvm - ``` -Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build project `INSTALL`. - -**LLVM 10.0.0 or newer:** - -1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/download/llvmorg-10.0.0/llvm-project-10.0.0.tar.xz) sources; -2. build [`LLVM project`](http://llvm.org/docs/CMake.html): - - **Linux**: - ```bash - cmake \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DLLVM_SOURCE_DIR=../llvm-project \ - -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \ - -DLLVM_ENABLE_PROJECTS="clang" \ - -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN=ON - -DCMAKE_BUILD_TYPE=Release \ - ../llvm-project/llvm - make -j install - ``` - **Windows**: - ```shell - cmake \ - -G "Visual Studio 16 2019" \ - -A x64 \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DLLVM_SOURCE_DIR=../llvm-project \ - -DLLVM_TARGETS_TO_BUILD="NVPTX" \ - -DLLVM_ENABLE_PROJECTS="clang" \ - -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN=ON - -DCMAKE_BUILD_TYPE=Release \ - -Thost=x64 \ - ../llvm-project/llvm - ``` -Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build project `INSTALL`. - -3. Ensure [`CUDA`](https://developer.nvidia.com/cuda-toolkit-archive) of minimum version 7.0 is installed. - - * Having multiple CUDA installations to choose a particular version the `DCUDA_TOOLKIT_ROOT_DIR` option should be specified: - - - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.2` - - - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2"` - - `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.2"` - -4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed. - - * Path to cuDNN should be specified by the `CUDA_DNN_ROOT_DIR` option: - - - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.2-v7.6.5.32` - - - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-10.2-windows10-x64-v7.6.5.32` - -5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed. - - * Path to CUB should be specified by the `CUDA_CUB_ROOT_DIR` option: - - - ***Linux***: `-DCUDA_CUB_ROOT_DIR=/srv/git/CUB` - - - ***Windows***: `-DCUDA_CUB_ROOT_DIR=d:/GIT/cub` - -5. Ensure [`python`](https://www.python.org/downloads) of minimum required version 2.7 is installed. - -6. Ensure `lit` and `FileCheck` are installed - these are distributed with `LLVM`. - - * Install `lit` into `python`: - - - ***Linux***: `python /srv/git/LLVM/10.0.0/llvm/utils/lit/setup.py install` - - - ***Windows***: `python d:/LLVM/10.0.0/llvm/utils/lit/setup.py install` - - * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/srv/git/LLVM/10.0.0/build/bin/llvm-lit` - - - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/10.0.0/build/Release/bin/llvm-lit.py` - - * `FileCheck`: - - - ***Linux***: copy from `/srv/git/LLVM/10.0.0/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - - ***Windows***: copy from `d:/LLVM/10.0.0/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - - - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option - -7. Set `HIPIFY_CLANG_TESTS` option turned on: `-DHIPIFY_CLANG_TESTS=1`. - -8. Build and run tests: - -### hipify-clang: Linux - -On Linux the following configurations are tested: - -Ubuntu 14: LLVM 5.0.0 - 6.0.1, CUDA 7.0 - 9.0, cudnn-5.0.5 - cudnn-7.6.5.32 - -Ubuntu 16-18: LLVM 8.0.0 - 10.0.0, CUDA 8.0 - 10.2, cudnn-5.1.10 - cudnn-7.6.5.32 - -Minimum build system requirements for the above configurations: - -Python 2.7, cmake 3.5.1, GNU C/C++ 5.4.0. - -Here is an example of building `hipify-clang` with testing support on `Ubuntu 16.04`: - -```bash -cmake - -DHIPIFY_CLANG_TESTS=1 \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/srv/git/LLVM/10.0.0/dist \ - -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.2 \ - -DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.2-v7.6.5.32 \ - -DCUDA_CUB_ROOT_DIR=/srv/git/CUB \ - -DLLVM_EXTERNAL_LIT=/srv/git/LLVM/10.0.0/build/bin/llvm-lit \ - .. -``` -*A corresponding successful output:* -```shell --- The C compiler identification is GNU 7.4.0 --- The CXX compiler identification is GNU 7.4.0 --- Check for working C compiler: /usr/bin/cc --- Check for working C compiler: /usr/bin/cc -- works --- Detecting C compiler ABI info --- Detecting C compiler ABI info - done --- Detecting C compile features --- Detecting C compile features - done --- Check for working CXX compiler: /usr/bin/c++ --- Check for working CXX compiler: /usr/bin/c++ -- works --- Detecting CXX compiler ABI info --- Detecting CXX compiler ABI info - done --- Detecting CXX compile features --- Detecting CXX compile features - done --- Found LLVM 10.0.0: --- - CMake module path: /srv/git/LLVM/10.0.0/dist/lib/cmake/llvm --- - Include path : /srv/git/LLVM/10.0.0/dist/include --- - Binary path : /srv/git/LLVM/10.0.0/dist/bin --- Linker detection: GNU ld --- Found PythonInterp: /usr/bin/python2.7 (found suitable version "2.7.12", minimum required is "2.7") --- Found lit: /usr/local/bin/lit --- Found FileCheck: /srv/git/LLVM/10.0.0/dist/bin/FileCheck --- Looking for pthread.h --- Looking for pthread.h - found --- Looking for pthread_create --- Looking for pthread_create - not found --- Looking for pthread_create in pthreads --- Looking for pthread_create in pthreads - not found --- Looking for pthread_create in pthread --- Looking for pthread_create in pthread - found --- Found Threads: TRUE --- Found CUDA: /usr/local/cuda-10.2 (found version "10.2") --- Configuring done --- Generating done --- Build files have been written to: /srv/git/HIP/hipify-clang/build -``` -```shell -make test-hipify -``` -*A corresponding successful output:* -```shell -Running HIPify regression tests -======================================== -CUDA 10.2 - will be used for testing -LLVM 10.0.0 - will be used for testing -x86_64 - Platform architecture -Linux 5.2.0 - Platform OS -64 - hipify-clang binary bitness -64 - python 2.7.12 binary bitness -======================================== --- Testing: 69 tests, 12 threads -- -PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 69) -PASS: hipify :: unit_tests/device/math_functions.cu (2 of 69) -PASS: hipify :: unit_tests/device/atomics.cu (3 of 69) -PASS: hipify :: unit_tests/headers/headers_test_01.cu (4 of 69) -PASS: hipify :: unit_tests/device/device_symbols.cu (5 of 69) -PASS: hipify :: unit_tests/headers/headers_test_02.cu (6 of 69) -PASS: hipify :: unit_tests/headers/headers_test_03.cu (7 of 69) -PASS: hipify :: unit_tests/headers/headers_test_05.cu (8 of 69) -PASS: hipify :: unit_tests/headers/headers_test_04.cu (9 of 69) -PASS: hipify :: unit_tests/headers/headers_test_07.cu (10 of 69) -PASS: hipify :: unit_tests/headers/headers_test_06.cu (11 of 69) -PASS: hipify :: unit_tests/headers/headers_test_11.cu (12 of 69) -PASS: hipify :: unit_tests/headers/headers_test_10.cu (13 of 69) -PASS: hipify :: unit_tests/headers/headers_test_08.cu (14 of 69) -PASS: hipify :: unit_tests/kernel_launch/kernel_launch_01.cu (15 of 69) -PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (16 of 69) -PASS: hipify :: unit_tests/headers/headers_test_09.cu (17 of 69) -PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (18 of 69) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (19 of 69) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (20 of 69) -PASS: hipify :: unit_tests/libraries/CUB/cub_03.cu (21 of 69) -PASS: hipify :: unit_tests/libraries/CUB/cub_01.cu (22 of 69) -PASS: hipify :: unit_tests/libraries/CUB/cub_02.cu (23 of 69) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (24 of 69) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (25 of 69) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (26 of 69) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (27 of 69) -PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (28 of 69) -PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (29 of 69) -PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (30 of 69) -PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (31 of 69) -PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (32 of 69) -PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (33 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (34 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (35 of 69) -PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (36 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (37 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (38 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (39 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (40 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (41 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (42 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (43 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (44 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (45 of 69) -PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (46 of 69) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (47 of 69) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (48 of 69) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (49 of 69) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu (50 of 69) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu (51 of 69) -PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (52 of 69) -PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (53 of 69) -PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (54 of 69) -PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (55 of 69) -PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (56 of 69) -PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (57 of 69) -PASS: hipify :: unit_tests/samples/MallocManaged.cpp (58 of 69) -PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (59 of 69) -PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (60 of 69) -PASS: hipify :: unit_tests/samples/allocators.cu (61 of 69) -PASS: hipify :: unit_tests/samples/coalescing.cu (62 of 69) -PASS: hipify :: unit_tests/samples/axpy.cu (63 of 69) -PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (64 of 69) -PASS: hipify :: unit_tests/samples/cudaRegister.cu (65 of 69) -PASS: hipify :: unit_tests/samples/intro.cu (66 of 69) -PASS: hipify :: unit_tests/samples/square.cu (67 of 69) -PASS: hipify :: unit_tests/samples/static_shared_memory.cu (68 of 69) -PASS: hipify :: unit_tests/samples/vec_add.cu (69 of 69) -Testing Time: 3.23s - Expected Passes : 69 -[100%] Built target test-hipify -``` -### hipify-clang: Windows - -*Tested configurations:* - -| **LLVM** | **CUDA** | **cuDNN** | **Visual Studio** | **cmake** | **Python** | -|:--------------:|---------:|--------------------:|--------------------------:|----------:|-----------:| -| 5.0.0 - 5.0.2 | 8.0 | 5.1.10 - 7.1.4.18 | 2017.15.5.2 | 3.5.1 | 3.6.4 | -| 6.0.0 - 6.0.1 | 9.0 | 7.0.5.15 - 7.6.5.32 | 2017.15.5.5 | 3.6.0 | 3.7.2 | -| 7.0.0 - 7.1.0 | 9.2 | 7.6.5.32 | 2017.15.9.11 | 3.13.3 | 3.7.3 | -| 8.0.0 - 8.0.1 | 10.0 | 7.6.5.32 | 2017.15.9.15 | 3.14.2 | 3.7.4 | -| 9.0.0 - 9.0.1 | 10.1 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.4 | 3.8.0 | -| 10.0.0 | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.1 | 3.17.0 | 3.8.2 | -| 11.0.0git | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.1 | 3.17.0 | 3.8.2 | - -*Building with testing support on `Windows 10` by `Visual Studio 16 2019`:* - -```shell -cmake - -G "Visual Studio 16 2019" \ - -A x64 \ - -DHIPIFY_CLANG_TESTS=1 \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=d:/LLVM/10.0.0/dist \ - -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2" \ - -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.2" \ - -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-10.2-windows10-x64-v7.6.5.32 \ - -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=d:/LLVM/10.0.0/build/Release/bin/llvm-lit.py \ - -Thost=x64 - .. -``` -*A corresponding successful output:* -```shell --- Found LLVM 10.0.0: --- - CMake module path: d:/LLVM/10.0.0/dist/lib/cmake/llvm --- - Include path : d:/LLVM/10.0.0/dist/include --- - Binary path : d:/LLVM/10.0.0/dist/bin --- Found PythonInterp: c:/Program Files/Python38/python.exe (found suitable version "3.8.2", minimum required is "3.6") --- Found lit: c:/Program Files/Python38/Scripts/lit.exe --- Found FileCheck: d:/LLVM/10.0.0/dist/bin/FileCheck.exe --- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2 (found version "10.2") --- Configuring done --- Generating done --- Build files have been written to: d:/HIP/hipify-clang/build -``` - -Run `Visual Studio 16 2019`, open the generated `hipify-clang.sln`, build project `test-hipify`. - -## hipify-perl - -`hipify-perl` is autogenerated perl-based script which heavily uses regular expressions. - -**Advantages:** - -1. Ease in use. - -2. It doesn't check the input source CUDA code for correctness. - -3. It doesn't have dependencies on 3rd party tools, including CUDA. - -**Disadvantages:** - -1. Current disability (and difficulty in implementing) of transforming the following constructs: - - * macros expansion; - - * namespaces: - - - redefines of CUDA entities in user namespaces; - - - using directive; - - * templates (some cases); - - * device/host function calls distinguishing; - - * header files correct injection; - - * complicated argument lists parsing. - -2. Difficulties in supporting. - -### hipify-perl: usage - -```shell -perl hipify-perl square.cu > square.cu.hip -``` - -### hipify-perl: building - -To generate `hipify-perl`, run `hipify-clang --perl`. Output directory for the generated `hipify-perl` file might be specified by `--o-hipify-perl-dir` option. - -## Supported CUDA APIs - -- [Runtime API](../docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md) -- [Driver API](../docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md) -- [cuComplex API](../docs/markdown/cuComplex_API_supported_by_HIP.md) -- [cuBLAS](../docs/markdown/CUBLAS_API_supported_by_HIP.md) -- [cuRAND](../docs/markdown/CURAND_API_supported_by_HIP.md) -- [cuDNN](../docs/markdown/CUDNN_API_supported_by_HIP.md) -- [cuFFT](../docs/markdown/CUFFT_API_supported_by_HIP.md) -- [cuSPARSE](../docs/markdown/CUSPARSE_API_supported_by_HIP.md) - -## Disclaimer - -The information contained herein is for informational purposes only, and is subject to change without notice. While every precaution has been taken in the preparation of this document, it may contain technical inaccuracies, omissions and typographical errors, and AMD is under no obligation to update or otherwise correct this information. Advanced Micro Devices, Inc. makes no representations or warranties with respect to the accuracy or completeness of the contents of this document, and assumes no liability of any kind, including the implied warranties of noninfringement, merchantability or fitness for particular purposes, with respect to the operation or use of AMD hardware, software or other products described herein. No license, including implied or arising by estoppel, to any intellectual property rights is granted by this document. Terms and limitations applicable to the purchase or use of AMD's products are as set forth in a signed agreement between the parties or in AMD's Standard Terms and Conditions of Sale. - -AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies. - -Copyright (c) 2014-2020 Advanced Micro Devices, Inc. All rights reserved. diff --git a/hipify-clang/packaging/hipify-clang.txt b/hipify-clang/packaging/hipify-clang.txt deleted file mode 100644 index b189eff1e6..0000000000 --- a/hipify-clang/packaging/hipify-clang.txt +++ /dev/null @@ -1,58 +0,0 @@ -cmake_minimum_required(VERSION 2.8.3) -project(hipify-clang) - -install(PROGRAMS @HIPIFY_INSTALL_PATH@/hipify-clang DESTINATION bin) -install(DIRECTORY @HIPIFY_INSTALL_PATH@/include DESTINATION bin) - -# Check if .hipversion exists(only exists when hipify-clang is built with HIP) -if(EXISTS "@HIPIFY_INSTALL_PATH@/.hipVersion") - set(HIP_BUILD "TRUE") -else() - set(HIP_BUILD "FALSE") -endif() - -if(${HIP_BUILD}) - file(STRINGS @HIPIFY_INSTALL_PATH@/.hipVersion HipVersion) - foreach(NameAndValue ${HipVersion}) - # Get variable name - string(REGEX MATCH "^[^=]+" Name ${NameAndValue}) - # Get the value - string(REPLACE "${Name}=" "" Value ${NameAndValue}) - # Assign the vale to the variable - set(${Name} "${Value}") - endforeach() -endif() - -############################# -# Packaging steps -############################# -set(CPACK_SET_DESTDIR TRUE) -set(CPACK_INSTALL_PREFIX "/opt/rocm/hip") -set(CPACK_PACKAGE_NAME "hipify-clang") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "hipify-clang: a clang-based tool to translate CUDA source code into portable HIP C++ automatically") -set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") -set(CPACK_PACKAGE_CONTACT "Mankov Evgeny ") - -if(${HIP_BUILD}) - set(CPACK_PACKAGE_VERSION ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}) - set(CPACK_PACKAGE_VERSION_MAJOR ${HIP_VERSION_MAJOR}) - set(CPACK_PACKAGE_VERSION_MINOR ${HIP_VERSION_MINOR}) - set(CPACK_PACKAGE_VERSION_PATCH ${HIP_VERSION_PATCH}) -else() - set(CPACK_PACKAGE_VERSION @LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@.@LLVM_VERSION_PATCH@) - set(CPACK_PACKAGE_VERSION_MAJOR @LLVM_VERSION_MAJOR@) - set(CPACK_PACKAGE_VERSION_MINOR @LLVM_VERSION_MINOR@) - set(CPACK_PACKAGE_VERSION_PATCH @LLVM_VERSION_PATCH@) -endif() - -set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}) -set(CPACK_GENERATOR "TGZ;DEB;RPM") -set(CPACK_BINARY_DEB "ON") -set(CPACK_DEBIAN_PACKAGE_DEPENDS "cuda (>= 7.0)") -set(CPACK_BINARY_RPM "ON") -set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") -set(CPACK_RPM_PACKAGE_AUTOREQPROV "NO") -set(CPACK_RPM_PACKAGE_REQUIRES "cuda >= 7.0") -set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") -set(CPACK_SOURCE_GENERATOR "TGZ") -include(CPack) diff --git a/hipify-clang/patches/patch_for_clang_7.0.0_bug_38811.zip b/hipify-clang/patches/patch_for_clang_7.0.0_bug_38811.zip deleted file mode 100644 index 28205a2645b83be0a278f2132b603ceb697b5d53..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14641 zcma)j19ariwrwK&U?4IcUA9S?{Dww zTD@u&_9rU=0*V3v1^@saIVsX#&3_fpuTPd{`gF4Y(E|D(76z7j)+YZCJB0sX$3{y} zOaI@jaq|9WT#SEXZDws?>11g1UnodGX>N-2TGCYpGXVgAg8>9!`+uNxbpF;u9Rnvr zJsm?MXEOsM9b+eJ14lC(YX@4>aV-yr*`c~mJ|)i(j=W@hcIh3^@HQBjruIM7RoF;W zP&FD(r==(obzpUS^TZ@;^@sJF$J0kf7VdaRaGAYG9<4s_soB{o5*(gyH~XVrFCnT6 z)HS^=ETcH9_G{Qa2kP2r6Y_k}2?1cssY8V0A}UIT{)g zn1W46wX`8U9h?%Wvz}k4DQlyJDAtTFWrnuK48NaE$UlLzw^KmoSL^OlL*wQ+JveMc zsQg~Cm3W=Dr(Bk77$w$j+p4=+=TwzNhZCu}KBmq{Ri zC)v0{+j+Av&QOW;eBqX9h>WKryhR}-ka(dM038b``;OL zWP4_2OIEPqPo#dNENG0bxQPzGO)2Gq$|gfp#{8D)2I`8}5iXdX5MYZoQ`PyC>iY)b zHd%5^>v%IQG6*7t05?kVP4_d!C!XTl1suh9T-S|>iETn9Ux^0a&EhnsT#!iWb2Xg( zih9$QLIWB*1q>SJky|KK@uJ?XXROulqOgF+q_SE~FxtWa?tQy+xFPbCK-zh7KS$Pp z0Sf0*ilKhCxJ!hTgH)88pJ@I&0F?=F%O?#qR1SxC`rN7l=zuC+hgfo(G6{Jz0T$BN zLy@^9CCkYa*73069(`w6=kMh%kLF%-t^TK`sP*v;v>DH|SJk)9b>Rirn?N~BP~*$f zDlbu7w`%dg^5V5*K4`TT>+YOj;~Z@iH(zjk)Uw@hur-u{CB?LC=F) z)6#`d)MFmCXu`XX%v$ey(|OL$Zj2lrQV{P?ckwG%NqP*KYlb0HV)5gD0)s*Nmmj-* z-;1PPv#9DX|ID?^;9`k1y;5dLC1CcUD{S(JSqA%LYsQ=e^~3mUmBhz<_Gb>Ap79ewZJr2|4O7t`e8=|_Is%Vv%;$hE*)7< zm0u7f=$%}o(A(4sWDmE_PpjFg#BU;)cgsty2t;;RI{9wY7cQb3_pI(x=d9{-*U^J4@`PjIsmQtZ`CtSgLx|KTPUse=!EfVrBzi49X%%DJ5n(qiS&N+DJ`*(vBu+3pa?M&1x01U3T{DF*_6AZ!847D2)T z^Si?QNZR3oRP^TxP4wp?Pq<`bcUwR^I5#)G7u}Q_P(G=sztwaTMLDk;DT}byYC55JCeRY0X)LF47EcO;nJP+Xi({_5 z%h(4m2*NhV_N&!|rJLo2E9fHMXP}jFf4aSqNFplPTQ3MIJ(TZFRfLtDsSJmyAuBCc z$M4p0mR&Yy+q+C-=I#o@GoGUFrs=7~KB>gFcs`^e6&$>Pjj0%02ym?T583a9JmA(+4Q>~`*>xN#^ockAvHmR?k&S!@YNiQBQF|f znpJ~ex|mqx_o;I1Eo_QHRdRBp$#d0dcyj)H%y~SOwU*m?grbsL{D*nmVyCB468Uix zn%g;yhLRiSyUCi9dWZ3vp1POOnv^<^;Tp%udpsCc8`p4@7qXI@3wcYGO+MA-1&PbO zl;Zw+){aoeV9aShQ}z>gSC}7-XMf4F)Hlo8@4TZn`noWh*J9p5ayZq*6hly+VG5ZK z5qhz&zAPw`m`+;sheR$w78I!-aT&hQqit-6nNnFzFl*bHuP*6{y0m49Tthj9e=u-;WU; zlxkYpG9DJ!iBv0#ANiG&`@%14R8dBVu0oz@Lg&0(OdiKIBWcZ3ls0s*!Gf$k>9Epc z*2Cn;GKmI`)YjG-aCT9Z3`5sMH!lt9?CL+dPli~h0oL7C3#-d zd_bM^9Oz1PdiRQxWbU_6D0p=UV}wM3iDim)RW5ncDg6{ZmL6G<-}k`vu*|antBz(% zkoRJs6?Z)~N7AD2$nfgl6vzdIZG;++C4unLi>T_P_@J+*B$eWVTt5=T(zcP+xLBMjawcuKyDr%cr z%3|WAoQorxi^DllMLbc3yB9<}83soY2~}pA=JjiUQfZi?DhE1X9>P!;(rGwJ$MhnO z-Qp3tJYUjorED*yl8~>fvC-5|pwqi!F}&we+fy?;n>72b5Z7TI ze1P{+hdGLMR+W4K+unNOGn*q!=RACS*!DqNu)gXjTHy$RQIO(m@t(1i;frlkXhwMk z;4R9J?kkpLRTt2TgA@RoYk5$xEK!Xg`av%R9gg_n6b=h6|&AY^c3s3EcwjPcpq7K1L4F#lE#057B z|1dl+9|L=bvh~?WTKwFZ{91b)^?J?rAo)0YRHs#<%%26G^=6ffPKUUGc!~ufz?JaN zRz#LF!U_d=*tyd1g6A((W~xs&Xt!fUs#ZF7L1-(@}mw5$kr=Iw@>rxgJxv?(HiqNHLxfj=U zSb4viN-6T-+7(oEFet2qkAwt$IH5rbGUCVU;Jir7*aj`hHWh>Q zU=dKuDh6*?9&fy5vR^F+d5!Ti_?7rzmeSbh(^$2B}CUh$H;06z%TizSCR(q z*J;Ty0xW;+bN|W<6J;}jYqKD&@X0GnaB_)i-NT^j5*@5b7s=c1-j2Pyx3G2xNmryW z@y(Ik?W@l5;vjKfGWIJcNAd^JS!UV(bs*aB@g*ApkFwB%r|iZ2T=9~HB~-4)J$mde z7H26xH8vO~E0}s~G-Gu%vU9`*|11=_V&7#flgd)JKdHz~gpkj+iEX2kbDdk@Y_uGQqS4)N(l{{AqWor*KU8 zQ|!=8qw8}=*b2u zsV5A(NP)e}K+)VZ+=MSvemgT*jwDQai5dKul!ASu zj1$?S97?mZXlVfVCM-@(G}1DE4IdE-qBlDypmg}p8^BWpGdjij#)!TZ7dJ}y%H`=6 zZ`nskxG9?r>yJ*ZiO<3jCQA{YyD}r-8S;D5F953tp0_AkX9{a8PHWoRi^X&u{O_9? z@af=|YpXYZZv*ntKh|;ycAl}^l%MfA!o4}s!F;&zKJGXADd-_C1UeUKM z&v45!1vi@==t+pRng-L(n)%nWud}N46-RAtcqC9#$4&ajZ7$H}64ec|GuFe|i9VP8 ziFh9Scv6Tt+sBa+0e#Xm@nzX9Y8z}CB@R$a8ZM2SZFs;f~~_d zjuJ*%O&*lI_`%APoCc}}kE~1_V>NEn0jgGvZ^)@)XP!pA9zGaZg}}z0oNmaN1tZiT z*+`Rek7)RdpKP`TRcX{v<3^P(jhbcHkkh5jJk3&r_LOw!jwwx)nxMAP~mNaFfIC-+w^ig*x#sJBM*L3P7 z@e8)^rz=;f!|9UcLlQIzR-;B-o|(>K+*A4)1zoXrv%RcNRC1=p0#Y#_Y~j zKJS}RhdR;O`P*Iraqd!OO$E-ay?^Rb_TYF zf|iF&EJ>}u1s&^_;16Y2Qc??7~b-TkiK9bn>lKF@}G1ou1v7)wY2DpKkV=%^D z+=x)8i9U5Q(B63}TRC<-W4<*vuc3NemC<5|&$DsR`Td-dk$VeTvP%qtgKa;xgv)Ce zx3R)P{W|gTMUo}Ys70E>=DNZ)cl{Xvxd%GEm_G=^PO_a&A*T8+4r*>$2Tbjn2CJ~9+Fg|r8HP(jzt6Vm#; z3s^6i4q_(NTZM`mCPr)9q5qi4T8IzTYn56S-xI@}do0^Nky`cxJ z)Kg$4c9A$HV5Vy{-)`DKRc_=yX{A(-WeYz1B=Yo=&=KvUlYI`u*Wbxgqt0s7?@?o- z#cs%ID43C3y>%A!HZN!tV4CpKG{9HxGI?l)E%SbvF^8L)8^cWQ`q6+pq0rsi-kWZB zJ2jzqzBA!x9OnAl!FbUwJk(_T=kZiHVIUpj4d4y$dLzH z0Sm}G$Q#E15bUg#8$?JF& zy3woOUi-T}D!f!*%}yR}oDaI{qIG<;-+l}|50<08*NI`OwiGAJdo;>Dm;CIds9YDZ zBp=%YLS-)Me%AIFQ!VLKI7TenZr%HpFcBVwEG&heAe6A*sonSK9kj)5<@`TZ9{wdsyE~ZL}G^cs3Xn;HgzPWD;8&CEKW%pDJbJC zEJz=q^EvJEur_s@Xw^zN-cJUdZQ(_#e|IUt>Jub^GLXuB)Bl=7c8X%CPZqD>3!Jdc3GQKCnHns?HuA`bHy4o)++NAr(HN`s_+ZsZt=)b zxBaxd5yq?I#7J$>EhE*48Gl_lUsv2kDS$LBxwC$n(y%W7DlD5O!{zAW2tY+Jq1*scgWOB5akK6UFcV}odblHo4? zS<_}U?uAXw{IQhb#)YH+#t-w;sRBjO8tu7tT7w8B^)kI{Q1L-S%mHuuSOz3RT4iOa z^Qo^a8h2K@kCTu*76YDHRm2t;6B-46r?csMyeW%*J04nKnI4o9+lDK0R>Pci=@=FSn;jA^m^8zt6djx51yRe2kv zT`=<6!X1+2q|CmHnFj$8;%OBe*L=ETMP#oi{`e!xeH}HDG5t5@dQv5O;ULRQg>d;~ zNq=?5Xi|uh)2UOFl1lg8kdn$+d!JnxKn~7s9;cGEmwqL!O zc;zxDwbvDRr;?|3GPmv(9JPa9V`sH5bJ_`I96tTw$f;+663sAsWn$8eg@@{PH|V_h zHDA+)>A>FMx&HI;7+jS=Qd+#ij)ReOA)^LbV*GP_9GDbXMo7h#BJ=wX?YLviHT}=H z6yHVKJe@eiXPcXc-b>`u-KRuu^4I=JI`g-#gSS~GpwA;+(2Bbp^=H?0N`zdG9>H-~ zv*zLqLYtH1_Gza6QK@TI$!lAUaBcpG7O;l3vnAg~mQnIkuG_*7Ap5<52D$MTw=oiK zA>O6uSHB;!uh~4m-rEUL3MJXH{+p%^*D4VnFVQLeNg-E z$pMBF08asc_(OnU3{YWR+oYoXW7^0Cufp}ZNuEGvkSeCt2JHeModN9T0qx`Fxcp>> zcJ%V6hB^U5qRq;W&)yjU+qruxyA1<^#JiK_p(r?m)MmaffbKbKTE$tJ(t~{lyiVscx zcI)}<^UtIRNWjWhB7)_Pbwf1_08pR?0C4d*h zTqBy-#P(S1j)CBQ@9Ja@Uzf*VvIL!D{`}0SmkB2H;$ckBu0)Q2UC->J#Y6OPL?W17 z4;%n(FD@mFU3|ICCD(CaqX)iicUZWPGLb2+jzy-q&7iz5jPU|qHP&`w$Jfn*TVTSo35F~yqro*$hU@We< z=0_m65&Cj^{jJ+D`={3Yt^;|ZKau{;>3h?enBgh=Bt~;O=}$LiKrH$}=WoMLCkod5 z3c(rVT6L3Ob0EjuU@C=wKpMlm5QjcnZfr8z@WH zC)p7Ybp!3+O$&o1pjsK()-!7UqIOds%xLBNsfM~I28!w+bZY>aQ}*cRkwzUCNdVXT zB=&ODA4jLC@KI^m59WEj%Z8RAQ;ohE!W6OW;LMicT`#f_ngxGo>&2_PdEP&M z=luu3fGv&2w{({dOU}2{bvA6;UE#AiUh$kMWg6A)gZVNvG_k1Bv!-}whDWAodffm%i)_C^3UlNm)@`o??evW^Yf znK37xn(R|Q@N2NnfU+F6s|yR>N)rex)&^XcdN>Oh;eMeYnsMDFTQYT01KjqZxtBT);-|VN<6O03rqbh{+bZ$y8f_O8tIuQByZUMopkaK&dHz(fAn$hQU#HSfvMl>R5 zC<0xo8`LQ#HUJ8u=$`EB!$@*Ot}afKPYx~(+R<8jC|n+ppd-8E=3^nd?YECds-Bqb z#zWGfh}j4Meh`en+5}v?KmDD#0=7{~n!6+lht3X63=}FGNL1bLCs0w-zTJ=3_J#53@~=)`5<&Lv=t>9(%T~G_UXIO@e(&% zUg5Jh_$Q?pBYluU!czx@LXkiFDs2Y6yu@Z&%ro!`3Q z@Q_e|~iW*0pEoSbgUKitxwjg+EE5wTxU z)EMKWxisyC%jbo|74)L#41Ed32|eE)8@GzmBW`D@L8QfA@_G+`|0AC~+@&72BAe=} zp^z(G`%;ssf6vd}f1*{TH=7ey3)AEqhlZ|OI1Gki;c)Sgj)q!2_`B4Vr&y@JK<1Sj zihGQi)UMPi7c~3`u^dYF_ZgG&9p?8il)jPecj318_IHp5a@ijhd!+K$NZ&)}dPl5e zJy5^|#lRWef@BlfD|KO2&YW7C8p#Vw;VCZ#-1FjR#+oaw^~ZUtz@i4gYl0AY2MgiS z=jgQ&BY}T+Ml|_OFIl|>)_yRcc?Tthmu>(&c2@TpD?J~^k}__RrKnlUA*AZ^tXoJz zR)Y7*+91N*4U$$*@kqv^oKz7Z{K%}dg$QPo=`BJdEmlu!S|u0l>Pke9_E(?+)B_z+ zDI8lS34FyyIKbZWLD-b1z>1dGWHHrG8@^S-s0S~^%{g)rg??#tRb`jHD-~}ADUjh3w z%||c?unnRSQH(owC9y3?)K#x%QKH1;KZk{e+DAdCo?m@O-*|Ip%HUrz%~8P~@Grji zX?`N$Zl@k1eL%9-+Y&y;Ad# z-Ip%OKLr<*234JuVeSKkzN%t#3iP|g{=vE4;?*!a2aW)u`&j$_VPKAk z%)r2{Y0g(1?}pd6S9%Y zlq*p}(?*J+a_Gqvd0&Mcc%3pF-i|V8ZawGz1S5r8>;p$|Z`leOB*!CZsY#-FX}B1$ zMVa6HE*RRzTOZJ@`d?LPCeF|H7jGGL>Zh)L|S4A=%-3EO%7x4FPBD`*-PtKQC zC-vM79V&q*fgoZj{PC7HxYE`<#(5kP=U?4svw9rDn@qq+$YS8JPln`ZpRbH=Urb-c zZsA{zU!R}?zcO5n+mNsbw<=YsZJ_%T3)Dh#7d+P-$Njjvu7*Mk>TIH{!HNhv{L5^6n^xdtuqBJchPykb@}ju1(JRNmtGx?9#xS+vQwO54>KYTRDk zXRVa72DPg@ypmv5&O&JId)5*gXYJP-Y4Z@2;G&WNtWS){hGjQP8C9?oGMtAY*b8>< z3C&arA**8aYPgkpn0W5l1ndm2)}E!DZNm9Sh;<@H@P2#ts0~8ULU()eyp6hVe%4%Y z!T!4`9rFsqt4;B|zS?MiBi=cGU4csNtxBzq?3sCU8OQ2}X6EF%X6E3RbhQSkyT$#s zO2NovxXV!nL$?W>>c9Ln?z2NF69RBRHVlW(66VE3j{LbtGaqoy$?@=Vd>vHTfA;1# zjn}>viip*uav9b2pWPZ#TEs@Azdse|RI6`!L#6zD8~b><-W7RRwYk6X`Ek8JxR7=| zZ8tj_-5$~B`BT>u5mcnMYX8x1IthYTZ*Py&ULKc{iinbktW$=5{t-&~#akSzAuwUi zA>CbC&PP+O4@hdJ_zv#JZbQd-@@HSfcXlzaekeQ2D?zWHmeAZ%e8ej`i06}`4<^j7 z0{UzYkaz=;JuJjHYqw6ua z6awD>TTScLB!KI?x-H#xE<5P#Qfkn~wNtyEb+L5eV?+_%#o~xhK+*TsIsFOruU)`j zNzDH95>2{L001Ki0Komf-33_bIhyJi+t|D4*&BZ4CjWCIP^o5Pv)Yd6W3%-PC-|Z- zcE~PsTQ|x1BP>bK6=$_3s~8L%GEq=%%}|bXpL3w*vXe`^jzT0~%Tj1+&()>tRp8o{ zbNz4W$Sl|I^hL`nn^Q#B281q(!rpzla*QnELo8_$a-sS;OhSU+ZV+FG1rA<8l|(Zs z?Y&@SL2eMehcbs@Rl*}8ZH17?)N05r#OnQp1dt>K5J)9UHR}l}LGzZg_av!g5%~Gt z2I^VzNKl^fu75O(0>r|a0h<^@B7sz-Kc#`<1&P+IRh6fJr3Sdc*ythx<06cT?Cgrp z1tW_ghnW1&(j}bRMf(~F5UdH^Lg@t%pl7WDv**qdZCs5&=Sg+QgMM=RZcssj;o`+d zg}O#|rA5Wvt#|q+12Ur0`K%)yJ=b;HY5R=GJ&rbkB(%o9Vl=e@_Mi0}JIZ|pa zlj_e#hgekaRE~Ai&zi^i-kG(18tVCdFGY-CM&*iRKvC;sV7_lz=|{aXBk*R#O@>Q0 z%m-9UsMw0!7?*rc7B@|g_fI0~lPa@%4iZHZmEu<@p-%DzP(q}Z39xOA#_TR^B`fCW)aRj&T97q3zH6q?Hw)A~vlqxN z?Sbgevx;m_QG!&wR|9m_E7K>LPQE8V54XYLDBBYSB#flEYaN}kD{T$9-g$t&cr1L( z2ic*S8kb=5g$Q~sfSeZZo3*uQfr8TJY3s{Qo~(Xv+0VA|o|}p@j22K!ZaOSJJ!b!K z+aOaeIFm|f2uVf`qP?WJNL(tQ?De`_08l@Pt}gmS3oCHdoQX@QEf5T)BAXQqycOsN zKjk-)BoA8)8Kg1`!D8r#K_ChRKp?A~UT3 zn-E}!_Oo#)%4!#G`O5a|NMO*L$OhYs)_3XVf(lckq@Eju5vLWcJ6yH*YEoyGR+J1gMS#Rs~u`}Yfx+i+p z`FJ<@VQY~K03n)`_XQW+ED>_re)`xam2z>crh1s4XK4F&abiwSpBWCirr-)&I(jo% z&L`>oO^kwB<#XQnro!;gB`YziYa0l}*LhOI(_V+{xgte{Hep*YjQ|%%UiQ)*>ZIk# zP@{!LrA4O@{=~Lb5g!Q}4o%4?9uimPIJV(xL**Y+?=EkQp`QGzWi+b+g$&j@nw6;) ziq46Np?&^4L(X&f;`bKrYt?L5sSyO5bH?HteUj(s zEXyfBPKPIR6F9h@)nrr*YhHUt0@tQ4CJJe$;cLO5T+0m}ULK^I`F4#2*4N!t_Q|56 z!&Ou&DTC5y=A(WWXY_i_g~>zTz5M!=prHdUP6cJ-wCX@Up<9#v5 z)3~?dN&FhuHj7t&t{RKgq}Rt|!IRf`ZM%Nf2+NgppB+pyC`8wcobXew)vpDaiPj7yKCjXybu?rbauHlVR`s47{sX*}wV zNBSqAmk1XW#AORN*SOL;^@*2*Z>TxHPY6F4W)i#@eBZdA>)_EM-MlD!^=R8jP{><; zZ2UY@Jn<4KnNcB>3iy58w~4-lRFYiYbKJrCThrOR_xrET_qis_PGg96E*2Uf3EPG1 zNH0zYeRcSB?t!c1A6iS`u<<9tw$Zz7N9NTmb)SGI;ZfCS+u6H z7hxZvg(cok9bwWt_YV&xg~9PaG^4{l?xQ&2mupjW{5_^zceq{2-TXSC9reGXO87A1 zy!f|r7}0f5oAi#iV_Yp^!`higHDj*RC(X(d;})MnD#0%iVhGV@*u;gv#}wl1u_CAl z5cz(0X~$W(9F|UGvgsRjd!oCt%t+AH8NuwmrW$dXeV1TWk5RXWyk~n$jjLHWg%6Ks4N6Aampn1n&{X01SmjI>;Ay;?{Z zu`3mz&&^V5zGqrhilzr(z-$Vg%H6m@vs=w)&B0={HUg9=g9pu$2dqt@T^q|A+uPph z%Z4C^*OF=AoyZ9boU{b;D=Ppy6uA?gsm@fF9dNcYW{`AB#vB{c*ik{YU#ngh^jx%fYU6{?95(TG1|m6(Ts>5F_c|kt1GXX3s=UwQ}~9^B@8inepvy`C0iyu^ zGx6a{!WAY&xpT>aDOj%P{|84K6DI2L6{fh_4sP7en%q^)K!J0K7J^f&c&j diff --git a/hipify-clang/patches/patch_for_clang_7.0.1_bug_38811.zip b/hipify-clang/patches/patch_for_clang_7.0.1_bug_38811.zip deleted file mode 100644 index fca5bf52e36ea96d4e9d5a32ced95e8a2d4e3588..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14641 zcma)j1C(XWlJzaC%eL8NySln;+qP}nw%KKLnO(MR+jjl+J98YW|~uetokv)2EaD4~v7+|7rpK4+{fJJ!_NyWcS}a zA^aCRHd=aGhX2JHYa!D9)<=8n(}Yx;EN`JRfvI0N64)Ilgv`SZ2_#E=RHWN@cms`1ccdp%ucI z~+Zalfv+76jdx_h%w0K+I1Q zC>Agjym+I%C%m^)##iNKr!DdbLOahNT@&&zM!-4~YY5GyEFp4|NNl9&n34nK#2Y4*{P5J2c*Zm0gK$CjR`xFndfpMHwpM%UFy=(%vEVy# z5kiS$+pMqo^9EO;TDW~2?#(zRM+W5IZ~4wnzH-}E(!JJAXKT|B4qf-5cbc`p6<{y0P8oubUUE%_=MX> zA8kn=9}l68?3~W*DLS7F+4q62ENOeQxz>c)>$~;XcDyd^PhLIELzGZVsKl1^z1dYL zPZ;^ZyH9eJ*;}TX!@ak&6Mt_Uc?Y^G@e7&R(c#!gy5f|rG&c0Z%haMRtr5owTZ+?V z5(wZ)Hm=b4?nC&L1f+!a>_{0(ob$^1d$|3qv3gl}pBrk&=K9;I$Df1uruqfZoUiNt zcSarAo|)N_6>RtusUIl|8lx+2qQh@fO1Yr2$qzh%0Cy5e<&3#KOo*rLr;b^fIK zzJa(+mK@VM-b{-Of=D62jgoxR{Y>$Rr}%aONAVrkbz@>;n^4JDqQQ5wIE^V6B$E1E z4QIci-n6CAfW}S%gT{H}77A6osCVlbYxTP*EZ{MztX30@ws3%Z-|if4h&&~bcAnhN zku_j|!nu@UsGlwF5+UUv73Jn9n*R?)pJ|O9okNjTu$Y z^PtwWbRiV=m`5#|@a`kC*1O(xp0l$XBZr3+#QW1-{K{359z*7uVaSwN{P>^1V37Xh z$8O*EBB|Fbs`|@6bL}#?SRzfYlvz>aB3;V~< zppeNTd0t#kvYPKrzn+WFJ@=_}TksX#!z8m$0i|EEZtGm=Wt2&+A zDqB_n7JIW3>41OZxMe9^hFQ1qNzEp+Zdb#f0|zQ{&8Is9>P#40B{1)V9|9Rk_N8v=yDW`H8)K)?@#EnwLq zNO)j=SC}73J6w>8{#>Dn{#@hoE*ij=@_f_9%|}f2(=7P$)l>Hejp(6p?t&QFUvEB1uN-lB=;0sMcu&f7_R zD0vgeF^|G)aBr*i6+?P2AMG*@IfI>7$W;#Woo4_N>2Oj;#)_W~c(hF0-1;yXPV&KreV`$#4C zhDdK2```sZ*aq2twVJSWv%GKxUF7==v@-5bw>J_=L?wIc1wo~U^1Z2wu(C6i;V?C1 zrRD1Q-8#;)%jRr*mubxWJw(6541`Gu!AIyRlC|Mk;pL8>(nP{n0(?$!N>Mp`BtvE* z(!Ns3nYlk)k37zJKi2o5G;{b6`0Z!SM>+662nzl4f<-bR92n-(o|QC@=K^VoEDJWPBHWN zDfrS$XG6-msSYWfUL;Xf_vwqIKc!Gr-${ZTtb{GNS~nQPqs01`-AyQ1$WTL0adYQV z6*Fl$+%2iMt_bC6ZW9L}V@*y=tvHhdm!Kk>e)nS^uT0Xod}KDHCTP&TW!MY8nuBrV zMdM7fYVb=J6N~&lRgS%dO;M;yPHr@Lt~w1*&YzDtkEgQMayySuRC0^|Fppd8^i)bB zKW;*EJBQIwa^rkAS#whFFkaJB_cB_OQs*&T<2ZSb2g7RP8jkWpR&sM8Z>h4$r@Fi# zak-aL++WYy5$YI>Iqhf4e&X&5^P}wW?B24chp8-7e@11%sWU9r<#~z2+A`| zA@dg$7$#a#{^@#eC>72hsqr3@#goBjbgNtotSW}B@R#{{02jXzM5$ZC zn|e2Tc2;%3?X8&~Y@Qi4zoal+T zyUC`xsOm~n`j+qoBZ9#mwHAkW>F#&FIZ(FSq)rSbo#SDS7?!I7Rrr-5xs{)hYlYgLq(ZG?~+FAq7E~=7Y=$h!}r9qut{YUr75bHF+dfrTVo#|`hg<)ZoDhlO# zM^I{}yu6~MpAvNfzz569O#4((qpn7pT751ZMJ~y`kManYC?y#c zy8}Z;UN{I$|F2rIn%jyse#b+xWm(qs-qhhe7!51CtSn3F=K6?nlHQL&$%D$;?g{A& zg4X&BRKpZZiu6&7GY-V$q5S7s;E~#Ife1j=#<462SnjnJTnki1 zZBt8GOq`T+aYS=*I47!zC#rDwf`})>;3y)Y%1qO|ehpA64O3L*KnKi280tbg4JYZC zUc|9mJVKY}OWLiJ?WI%_@^v*fn)(TJdUq^_m)+^J$=F;&YXK`Te#&-xYG!AXX5SU! zI?RI)@ILA=N3qVTk}qJ}TTgsubA;)fhi?ztK4=TpR~ale?m72i5PuW=&OF zaHC)G@M!RhhhmA}WW#DbQF*ad%|W-=X{olLw`%7yub}PJ6CZb7%7P*{)`eLSx->2K z;<^qi?^jbPMIKzcf{G3Xg_ZD;kf0AIG)O^4{CFLl7il@W>-hvHtm|MF0}bQ#e1YuM zLx?-M)umo6_UUMuOY2?vRh|hfnESC#2i%3wxi_bJbx9i)FKkuz&G z<0E}^)O`?i6z#ft;`da@4euuBnFqYqc}*R@vxUlb<{rT-9!CpwtIGlnse7Inu6uK} zJ|kw3L>Fx>MN`Gcw6bZ56tjC(wxsQoidpqv^-t$9!+4IJA#r9EEL@+-UB+IwvRoQm z9O!dy-M6a^oC(QIYt_3i8U3c`gB_7|6}z&6)(c)keo0E!Xud;!Oj5dc6THtF4_ zVz3@80%}>s;O)xejkjzZ+?Fv>!deQaT(B#G6eNcbz5A6wz#H=+{D@1gg&h6lwfUUHXJD=XdfPRYacue2NH+OCeb3B^+T z(_VaT1?LBQcu4xYtVJTDog$(cf_jsUEN~}HEkhKOxpADvzfSlDdh+oVykq+X;Rl{7 zHAZLx741IOh3@f53f&p@MWB9749w9{v|{A_^1Lq;f=_e4%`Wh8!4Op5Wc3QcV1lgG zt~6gOw`B5Lf3O;ei5q)iJB1Ll90W#}7*t)NgEi?QdE4FFv3K_t*6twb ziWDZkIg-15)j3`qB<@Sbe&ysy{vbNbEZe^hMEgCyWFz2F7JBfMy_laXUb3)+%GJ0> zkKM)MEaj)h2E$|pQ*VuCtd2%@j=12Tg(6q%yNqR0nJSY+k30Aq*;ip1H`?IDeQOS3 z_P|MrKWriu$lf6XDQkL%T@9j~k*B;4PT7c4d3`J@O22~BUs6iH0?R*=3Cmy}(a<_h z7%hWFM{!Uboz7JiE`4BDXNKbDs!{V z^ysQgC6U-XF^=+Jf?4YkH7C$Cjlx+GZ__oA$gc;#gd&v$V(su5lyUhF(9G`SjwVni zE-{jN!mx`J*t-lA%}v8i_%h|UGlS)*vfgdQ!N&TWC3m?^l+LMhStG+UztCoxhhM!i zMl%z7+b0#}vX!YKk@!oFqdJ)47T5Jp1^L(-eI+h?0b4ZxL>V2S7 zjiiZ5_fgwj+QbAw$>moE<)mG9!~E58$!6!>m|h{gvnzgwcUsi5XZ5tZk`@rzvo1A~ z4w?V3`nrG0w2RArIFDpxojd`@JzOPa1uPBAQ2YQW3C7rMukYw$OPgA_sTQ6Xj!95%euiNf3$NGsKB`3 z4#|j`zFxU6$6}=ev@wcE)Qdi5K82$mOVPfw6@OBn+aBQ6JkEAHfxKu@Vg`=qUkyYl z*f+{JkuA!hG&_ry25@h};?zVVE%Vp#5uqS@vvUGUhyT0*JVh|0Q=D&%=v#4dqlB+q zo^J7$eT0OYve~fy=;WICEF5976!Ez$GXkC=zbE|yuzKKmi=uU=u(sl~roFvbOxMBx zzL^1^4sN-&dh_=-ARql>Etg>D8Ou%i8J{EEn-d+(hYRoHexsj)PQ*jrMz_1p->%{n zee3cJw=7d|v)O^3gjlO-Fzu|Fe?9v;t6E=i)YgVa0ws0aq<`Gz0&Ol)-5@(-J)E8B zbJ?GW=dq6`g_yH_92pVNC*8y~)PESrX`s96ema*2b)HMG6Z(t!l1|)lWCfuzj{TEY z{j?%b&gK!3A?xgD`18>+WCBZ@<>hiP`eU&7;e4u05+&UZc`^=l<`H92Jt#VLW61pI z^=N(YB;?tn+)c6k8}j2L>-|Ez0urUP9+gjRD@hPtfi5M8v+A~Bi-6%KgLdV ziN<5fAGh)1uIz>Pdxy;w4^vN6H`5F2ULE6<_)|EG4QVRj)CE*hEPV|ZHF$=Fb^|5Y zIy~bjVWidMLCK3BtUSqSpla~Q%EU2N<3=5zYQ^}5oGNzaY1HfCgOODTY~0D|hKyM- zLJg9QG%5FphQIj9W?N8|Mh!J?RO!;FS%wWcUE0jkEH!9PNr(v%59<;h?g<6p<2 zRTp^BQY(gkkEkUwE-z><2*iScTQF9p;}m`Qin)&)22Nl}Q#Oi|CtFP)b%$aMkX(38 zr(P1jVEcZ$a+NxqE?GV#L6cxLYUBl)_UB8Jm-^C%4LS@N{{nVosgtcH{{?)PELLDH zMKRb`mhY0Q+->c`5{d~&ELM0fMbTZoQGvgnFI}VVfr0Muu1AJO3NmzOaTAHok>zI0 z?p)>bz8Q6>6P=yE?G+$bTV{IHJuZQ$O2DN(n}S;FE%@Z*f9sEr$3!8wZ`=&nX$Xx1c4v#2`4>_ESr^ zymoOLD=gHn9WP%bS@Mipq$zB!D_nEep8=42pwo-_gD~tA%Z`W6C!5U>Tv2Q>wW{uF zd{pI1#lS}2{4&48{q}HVx2^o_zlxv~_xKa_^b;xKT+Y zqa1Oz@w?9^7M|OaT~2akU+yIz}{^v6Q@T1`6giw7?pY|v{(v69EDu%7jHhBp3@0P)_2 z7qM4_s9sf_%}c4awhW=hW7NTJRGHe=>(ccU?C~0*r)!-tS*eIgI=_ZHqz$#Hxft9V zdcaCO1!iIwiDLq0x<>QurVUi(M(&eVO66F#;KNTMPd^DA(LOra=P-Qzojf(_tVaDF zH6~i@hOCBy8M)P4XF+fCf<^(R2`^0peB~~ahgR4!@0S^KxT(1@%;c^g4Y(5u-M#I- z>2|kM6ME-66Mn{FuD>0O7wy7BO~!v7PlZEHW*{zC;p^6YdCzFnuTyh?VJob{eBH;l zpk|Z!?rb%vUeu(F)aDSOPN0!Uj(gA@S zd7u@rfV`6oZD58ozJo2+Ue%j%y`SXdt*4cp?Sn;6Jt-{@xCNjE`VgumMpQ|sNO zwLON^k)ldFvdXY5LH&5;T`{syBzHb zwNa%@(L$lCX#L{-WYF0bUZnbWmlCW#K@unfsoXdHuQ6n&D2DoE@#;;sER#-q#KxDB z0-a-IL?~LZWk^m5j>2f9e|(wVhiz_`wK;V%GS%MBAwD)&tU+U~GGB4pg@dLFzd-I5 zj~sQ|PsK~62$kcs2{HD*Q+AMx#@iRH~l z7h|EG&3@PX4;r;^xu8`J?L7w##Z~~ zV8>G*+_-4*S62_tOFb=P23O^tmo5@n3}zOcOQ1!PBh6I*6oe$lKdUoqrJU&9HrTe( zwn$l)rzzJssna7y-i<)&;2AnOw4l`m0C0sZ()?shpBOTF##Kgntjt&^EyIPc2 zd2cyy1LojqqW!GdB+SAx7K-fp=L-RJUndUz@cZ8UtP$;W%e3W25i^aa{4R!W5Flcs za6*A(TINAjxH_BL$X-IkV>+Zz@xT_$JCsC@HVUf}>#FUQ8XA*|IQcA6c|;`&9l_-c zp|LPU1+K#e{nsf%M=H2kjlLi-R$BgpP>B+HlL0yF#+^DkoV9AV9v7Q4SuVqNCQ60BC0 zw?WzkBd;yoAxTck?7NtG5D+1rR>5)2r#n_e_KM<t4Z8JLolbR{JuiolwT%(;tqUdKM_r46|1zCf!(gsBU+I z&Wm63HEfs;>>Zx#KM#+=RS6`e#VhPM7)cj0YM>>?KgY*`Nr7dAR9q=CzyHvVJH}ko z|BOrVU8K#^i9>w0xq0ZlL_XbpO5`Sg?VqGGf9pDUn`Hv}JkkZNxXV$0c3r1L$o1$E z9EUY)F3updIZ1AxX6hf6x@MKUw$%vN=8tFrYgjv5@@-@pB|qi5E&Kqo-wSAv8*gzN zBjFa}U3z}?`yu<9&GYNMolw~JZ16hRN$94RPe@7Go2^`>GnpP_)FQtfO^t@3Lp!

j10_E1wk5$p#$(ov;US^kS5j# zweOxBU^oHr6aa`n1Q^Bu71p&)D%wA$ja=|5T(6tt31kMTVp?s`E&$RQz-}JUK5mZ7 zPiAOGFMn#N6EGy&to-==&Dcb}#q_qJl#82X*jS8f@387>Ph;^Uc{8RV5E>6+kLc9f zAV{EmmD|=^C3YFhHN$o0e+Sor+tFL5auIuy#8u(G4_B$(Qoha*8dGs$Sias?=MKhI z)vBShf5ENU|KMqsp!B;g`GGnbybbl?LfgZ`ix({YCQWFRlLI&BJ$up8KJn|wQ@X17 z(ByBYp3gr2Op1U6tb8RRSngOiRKoxO1!@2Q$Nx1E;VZY|AGrvIMql{}_C{Z+2o0JR zHmjUS@7X&1h228d+q~A+wcWg}zS!TRYNBzMq}$#NkNB6r84k}Aht)5Ab_$3$lByLN zCtAlfqIpehkHzj72=4c;PUi4+c?>2?&^hMM&y0GRU_vh*#`Nq;d0##u6P}0m>&W`;>Th-JbMeq z;)-j21acdpFQ?bvx(%~`YR&IDkSF>R>EE2bH=T(Yp0ZD3G^dmPbYlj@q91hrHvDv= zV9l=(oI$QtH~BROa?A~;Quqg?G0Y2b=(FXU?d`n$t!=~L8KAYne&zVvt^5dNy*sEwLs|n@T??c(1MmoNn zX|zjAoek)@ZK-9P5-h9f5*->A&Q2gHy>t&t+HV-5l@=ZhaU+YfnAmk~+m?Dd>oFlr z&Mtgy851=0`k*7o9`-x|;T0Mdb*}KJ0+{ukf50*}zYQ9wWh7#61Yk3nQM9FRyr(Yf z*r1&mbKo*IlwDQztdRZ6BKJ z$&k+N5X=fWw|9DT;?1lXeNIPwn&D+c zBZ7t^(51RTonm4GpdgCw$-X{}BuC`x;xzf>;L@NSt+j{3&EBpr&FjS%1m!3eBPz_t6+-OVVOQL|7hXNFW*0)UI$1a8;Li;^FV7IAC zTRWcWCZIQ^s)a1^+*cqYIwBaqgHjv*6Qw%DCw#HDOA7DdM&{c6sl=7%O@TYpCkamL2d$FWx)cc*i^KvY zN7hY#5l--Bd<~kczi@BE@Tqe1#_pG_e#z`WyLNof)Shp4!Q;=#>9+jCEe+a8DVh`! z`xQlvF;1FG(_XlIUN~GqFM7_R~Ig zsjeCdxze>SHJSSN{OtWFT2*?pIbpRhO}=qx=(>f&U>Ft-7Z2%ZsMUkNOI>-2h58F* zUb&&T$Cyd&N}X~+!;cWlp=5ubF)80+eh)+G8`*vrZhLQk2WcRe{ZX+;Du0diJ#?;j z#7fo!1x!#3oY5^vHj%wj7gpuWskNz*ys#9W@>0M(FMejMxzbvHoTmybY5=?@2$6TN z5H5X=UK=qI_;+VSlkfDB)mvcg2LqaSP*Qm52Eb!yb)T`)^I>@6RJO?e8eXo*c0Q~k8zTP2Ko@IuTik0Srbc%^Q!84u&^Z|gL( z_0EQ1Qnp4k|JC!N?A7zK?DG32JlvM71Z*#LNz(6ZQ|1Bo7=uveTEu?McHXIn)DAYAwuRR1IUHWcohwH@-px!@JpNNUgb7ua*1Az2zqb4Fk!(+igMGd zhxiOrM6aT!*hVL{5Me7$qlE@+$2CK0T%A7lCqFg()|ydb256<7*(|(DG+^!{_viCnHe4HCS?Se#h=~>QZA+&5!6e-cjf#+0O)Cz)|}Z zus_p$1akn}AQ}LF6zCaX=VwLm|68tYh6MW&QhZvncBsA6L8mF@SJB?VA8S}$F25M zcBRSQT~&S~3@5}|Ya6xHM)kzuVK2YeZDmJG;zf0^Noa-%K4YPCL2q3zTweKGW=7>l>C6b&_ z+W;Kg&mvM0^~g({@2V@S4>)y_H1Oe1V+vjW5+yWkq!=oPo=lPVRoH>oDZ}CID1+wKbM8+tQn4eN0gY+{2tvXq;+xf{u$9NS$q?)Y$QgH4B-7Dt&{uN-fA1#3>qh$I zd}(!3&)v|W5_l2_B9_7*Z)t-oZOvnx$02e4)onJb$05AQ1dN0%1|IulNRIaT%INmR z^i}K@{?+*P2`ca_!_~MA35#&6QkB{Ux<9c%EhKlrbIozwkE`oyD8!)7CQ1q?j&bwd z1j>oBIn}b+g9DJ15)>w()@_iznjuAuQ0sY6wm9cjrKRYD2d2AW$5Q0p_E^|#i1Gk z6XqP!-KFJxH0An$q-Kim;C}2jbc`o|_CJ{kI8 z!u%?r|MqV`#M1FG?AlrrhoHNtIMr~coIm$S(+j2OHS?iXi1L?UP2*Ynt-m4=cb;VC z0nKvofq*kCepf6(tGI)#O*C!q@+Iq&|FY#S7&C{KmM0%K2g@#%35)8Py`wBJ_ai?+ zJ}-$Z^Zm#D`W)tRF>Z#BLN%%Fum|K_nY4M7%-rlY(DOA?N`(jXFApVWbSUjg@31kt z9)n9E@C~rlv|ddDxW22~(p~4WgU&9c25nqBwd+|IOBX&y6wzHQj`#!=eQ%x9pFsaw z1>oeRNc+!AH0eSC0E{F60QdiO6=0?3XsTmuWACD8Z}^p){Lh6zrJ9Y+YCEEj&DJxV z;ETT4A-l|N-6ZFaup~iOoYk7FVlZ&XL_xJRLpjoY&VicCPA>5}3Xyy*OQESfSC_6= zfooUJ^}nSfvs}N^7cH-BP7z%j5V|M|d-v(eF|vpcv7|}Jh3e-p2?>6?L3|w+ICuqB z63wKv_kxuLxk2KfUV7EQMaPN^~+D~K~4U};KLG?ucWe1l+CL>5Jxo*P zNU6C@sy`bYVo|+QIo3@-YaZu&XV&&fET=G3x+%!GjSLqmJms@~YvywRBD41%?ZHt+-C3fY)uCHJvhCGGZ z1ioCwWE;sw>>y%U#pWz}IH(@=gtjeK2j|XMnwsf-$)bvAI#LH!^H9nx@H8*6N zd#_Mc_zm;$A^3VQyP-9;iuTu(uzG^qlzz`f$3I~V1Tr1Ry};jECNCUWfO~6%*Q$TS z?k6y%<+21iXcI(&af)A<++M<5UYiMJ8zqP85_k z7y}9e1czs&ZeI3wsMMXd4!!rsi!mnOEnC$A9D%mz{G636WYRz_v9Sv%9dBteB%ypNBeXLDt~-u9-sLEKv8% zULd=)2ckdEDzZIA2~zQ14bWAuOrK;r`JMnh+y;lEY)=%BFp}b~b#%(Ev^C&*=K=cS zvG6e;WQS&IT!P6LBIvmQa$3A^*4Cm03QC)&tuH%yviiMcKikH8ZYs_&T0kwi>9F|p znEk_TgG{;LOe&!vBpEq~_LAZvajAf^*XwctK>Z}Ty66)vtiV-sCN812KrobwY*sMv zR-hmJl;22_JZvpwkjg9si=iKqM~P+rG?t8#{+#|%OFcKA6R&9>+TAQcZ%i>8CQGJ? z%(Mb*LVz9G&&HuBt6jL|E8DLlfkAH~8*DFH-=&)i3Y{(q)_+7$uN#fVX{MhcyoEd) zcdE-^dZlPdir6-G_j`bX4Z|==&zu>0_T%%Ehso>S2DKq3zqni8(!eW;p1Yf-7w4 z=*?s~pQQ6QF$!jt&w1mU3d28_ti-6UZ6FX|`$-K?dmXapiWC*vgl)Yv0$dz<*-Lk* zla?n#jTRb}7M())6Wdlrd?aW%G$o&SNL-oY*oLbOm48gVySy=mdh)B5(X0j(GFaeo(&hl zznoZBTieOFm4A);9J|}o`sDaHTaaeALbo(8*zS7~^W?FtT6OM?c{YcEVfrS%*A8%* zXj~wYf+l``s(cvQ%64!(=DuM;%vNVTpc8G~-6^SsXPB=D0?C5l1GXAW|ih`N*} zF|j53xOdp$%w1U4^o>quT${R>D5RN&uLXm0EjM_0d5~`A+cgqcUw2p8 zCyR;>S5c{?3`(DwkNRDl(d#uACJ%l0^6OKAh7Pzm6_kzBsss6iZcX-+_ubk-_4fRw zk+*nHOE7RgJB3w`qmo3~}<4Wt)CteP|q2~NPA^c#NN$_IuedB(vgGY;W^P=q4qirKW zA#eGy@$*RW#7m@PMuku+;P-LgCi)UmNpgA5aR=vbO=t7o@4q_V=bA7(jUn2(SZIJG zY!|L0y*M58*&Xy*3%peaG1J$pt#bO7AQ;FG=*b7@`+vQ*9(7b(fVPZV?uNwcoWeV1 z(VE6ygnfh-mUusPgnieFfc7i6B3vfrmbBf$h05GkE>MUg1WcHft5xqHa(ydTz*8KJ z(}kX2cq5N?gc`p)O2y^HNDjEl!lOz-b7r2^8M;%Uwjx^c_n30s;dUi=^Xr6m)c=ku z;lqgY;@`?)MAt!W(mURcakYdEYiAzSjJZyqG%HJtTYL(s1iwUxAw-*D6Bhy>Q;4(2 zil8DubR6mzPHgl-B1=h7<%jSXm6+JzHv}_Y1F2u<>n7Fl0Ys5(deewE`J3(sHr& zY9V37u2g_NH%qDco@rGnnjU}wvng~ccjE@lZZ)4Z2aD0#2vDL79yCiHur`HuZ7gqW zZ+oXN8-f^KOQwN$A}1_x(h|t8tN`p#Ws{S|wPP0I>Gka;pXcN~&Ga`D#PuDlAf1k0xQ z9cpO}OL09nT|y~lnVR2a5g}xwq5+gAiN6UD<5}*WQ~;Jh%LU<(*Uu6|racck&pCU} zq=|E$(Rs8L%vI)h;kc#%wCOPN@;G{51?#AI{t-|uTp909;TuAiOmz9Cs|NFdE-L{9 zi~{(diEm%ix3BUSaqX}2cN*Nk5dV${_jk*`5Jdk#{Ec+>MOph7?%xsP{)H3&FSviw z98YW|~uetokv)2EaD4~v7+|7rpK4+{fJJ!_NyWcS}a zA^aCRHd+Q+`v1imYa!D9)<=8n(}Yx;EN`JRfvI0N64)Ilgv`SZ2_#E=RHWN@cms`1ccdp%ucI z~+Zalfv+76jdx_h%w0K+I1Q zC>Agjym+I%C%m^)##iNKr!DdbLOahNT@&&zM!-4~YY5GyEFp4|NNl9&n34nK#2Y4*{P5J2c*Zm0gK$CjR`xFndfpMHwpM%UFy=(%vEVy# z5kiS$+pMqo^9EO;TDW~2?#(zRM+W5IZ~4wnzH-}E(!JJAXKT|B4qf-5cbc`p6<{y0P8oubUUE%_=MX> zA8kn=9}l68?3~W*DLS7F+4q62ENOeQxz>c)>$~;XcDyd^PhLIELzGZVsKl1^z1dYL zPZ;^ZyH9eJ*;}TX!@ak&6Mt_Uc?Y^G@e7&R(c#!gy5f|rG&c0Z%haMRtr5owTZ+?V z5(wZ)Hm=b4?nC&L1f+!a>_{0(ob$^1d$|3qv3gl}pBrk&=K9;I$Df1uruqfZoUiNt zcSarAo|)N_6>RtusUIl|8lx+2qQh@fO1Yr2$qzh%0Cy5e<&3#KOo*rLr;b^fIK zzJa(+mK@VM-b{-Of=D62jgoxR{Y>$Rr}%aONAVrkbz@>;n^4JDqQQ5wIE^V6B$E1E z4QIci-n6CAfW}S%gT{H}77A6osCVlbYxTP*EZ{MztX30@ws3%Z-|if4h&&~bcAnhN zku_j|!nu@UsGlwF5+UUv73Jn9n*R?)pJ|O9okNjTu$Y z^PtwWbRiV=m`5#|@a`kC*1O(xp0l$XBZr3+#QW1-{K{359z*7uVaSwN{P>^1V37Xh z$8O*EBB|Fbs`|@6bL}#?SRzfYlvz>aB3;V~< zppeNTd0t#kvYPKrzn+WFJ@=_}TksX#!z8m$0i|EEZtGm=Wt2&+A zDqB_n7JIW3>41OZxMe9^hFQ1qNzEp+Zdb#f0|zQ{&8Is9>P#40B{1)V9|9Rk_N8v=yDW`H8)K)?@#EnwLq zNO)j=SC}73J6w>8{#>Dn{#@hoE*ij=@_f_9%|}f2(=7P$)l>Hejp(6p?t&QFUvEB1uN-lB=;0sMcu&f7_R zD0vgeF^|G)aBr*i6+?P2AMG*@IfI>7$W;#Woo4_N>2Oj;#)_W~c(hF0-1;yXPV&KreV`$#4C zhDdK2```sZ*aq2twVJSWv%GKxUF7==v@-5bw>J_=L?wIc1wo~U^1Z2wu(C6i;V?C1 zrRD1Q-8#;)%jRr*mubxWJw(6541`Gu!AIyRlC|Mk;pL8>(nP{n0(?$!N>Mp`BtvE* z(!Ns3nYlk)k37zJKi2o5G;{b6`0Z!SM>+662nzl4f<-bR92n-(o|QC@=K^VoEDJWPBHWN zDfrS$XG6-msSYWfUL;Xf_vwqIKc!Gr-${ZTtb{GNS~nQPqs01`-AyQ1$WTL0adYQV z6*Fl$+%2iMt_bC6ZW9L}V@*y=tvHhdm!Kk>e)nS^uT0Xod}KDHCTP&TW!MY8nuBrV zMdM7fYVb=J6N~&lRgS%dO;M;yPHr@Lt~w1*&YzDtkEgQMayySuRC0^|Fppd8^i)bB zKW;*EJBQIwa^rkAS#whFFkaJB_cB_OQs*&T<2ZSb2g7RP8jkWpR&sM8Z>h4$r@Fi# zak-aL++WYy5$YI>Iqhf4e&X&5^P}wW?B24chp8-7e@11%sWU9r<#~z2+A`| zA@dg$7$#a#{^@#eC>72hsqr3@#goBjbgNtotSW}B@R#{{02jXzM5$ZC zn|e2Tc2;%3?X8&~Y@Qi4zoal+T zyUC`xsOm~n`j+qoBZ9#mwHAkW>F#&FIZ(FSq)rSbo#SDS7?!I7Rrr-5xs{)hYlYgLq(ZG?~+FAq7E~=7Y=$h!}r9qut{YUr75bHF+dfrTVo#|`hg<)ZoDhlO# zM^I{}yu6~MpAvNfzz569O#4((qpn7pT751ZMJ~y`kManYC?y#c zy8}Z;UN{I$|F2rIn%jyse#b+xWm(qs-qhhe7!51CtSn3F=K6?nlHQL&$%D$;?g{A& zg4X&BRKpZZiu6&7GY-V$q5S7s;E~#Ife1j=#<462SnjnJTnki1 zZBt8GOq`T+aYS=*I47!zC#rDwf`})>;3y)Y%1qO|ehpA64O3L*KnKi280tbg4JYZC zUc|9mJVKY}OWLiJ?WI%_@^v*fn)(TJdUq^_m)+^J$=F;&YXK`Te#&-xYG!AXX5SU! zI?RI)@ILA=N3qVTk}qJ}TTgsubA;)fhi?ztK4=TpR~ale?m72i5PuW=&OF zaHC)G@M!RhhhmA}WW#DbQF*ad%|W-=X{olLw`%7yub}PJ6CZb7%7P*{)`eLSx->2K z;<^qi?^jbPMIKzcf{G3Xg_ZD;kf0AIG)O^4{CFLl7il@W>-hvHtm|MF0}bQ#e1YuM zLx?-M)umo6_UUMuOY2?vRh|hfnESC#2i%3wxi_bJbx9i)FKkuz&G z<0E}^)O`?i6z#ft;`da@4euuBnFqYqc}*R@vxUlb<{rT-9!CpwtIGlnse7Inu6uK} zJ|kw3L>Fx>MN`Gcw6bZ56tjC(wxsQoidpqv^-t$9!+4IJA#r9EEL@+-UB+IwvRoQm z9O!dy-M6a^oC(QIYt_3i8U3c`gB_7|6}z&6)(c)keo0E!Xud;!Oj5dc6THtF4_ zVz3@80%}>s;O)xejkjzZ+?Fv>!deQaT(B#G6eNcbz5A6wz#H=+{D@1gg&h6lwfUUHXJD=XdfPRYacue2NH+OCeb3B^+T z(_VaT1?LBQcu4xYtVJTDog$(cf_jsUEN~}HEkhKOxpADvzfSlDdh+oVykq+X;Rl{7 zHAZLx741IOh3@f53f&p@MWB9749w9{v|{A_^1Lq;f=_e4%`Wh8!4Op5Wc3QcV1lgG zt~6gOw`B5Lf3O;ei5q)iJB1Ll90W#}7*t)NgEi?QdE4FFv3K_t*6twb ziWDZkIg-15)j3`qB<@Sbe&ysy{vbNbEZe^hMEgCyWFz2F7JBfMy_laXUb3)+%GJ0> zkKM)MEaj)h2E$|pQ*VuCtd2%@j=12Tg(6q%yNqR0nJSY+k30Aq*;ip1H`?IDeQOS3 z_P|MrKWriu$lf6XDQkL%T@9j~k*B;4PT7c4d3`J@O22~BUs6iH0?R*=3Cmy}(a<_h z7%hWFM{!Uboz7JiE`4BDXNKbDs!{V z^ysQgC6U-XF^=+Jf?4YkH7C$Cjlx+GZ__oA$gc;#gd&v$V(su5lyUhF(9G`SjwVni zE-{jN!mx`J*t-lA%}v8i_%h|UGlS)*vfgdQ!N&TWC3m?^l+LMhStG+UztCoxhhM!i zMl%z7+b0#}vX!YKk@!oFqdJ)47T5Jp1^L(-eI+h?0b4ZxL>V2S7 zjiiZ5_fgwj+QbAw$>moE<)mG9!~E58$!6!>m|h{gvnzgwcUsi5XZ5tZk`@rzvo1A~ z4w?V3`nrG0w2RArIFDpxojd`@JzOPa1uPBAQ2YQW3C7rMukYw$OPgA_sTQ6Xj!95%euiNf3$NGsKB`3 z4#|j`zFxU6$6}=ev@wcE)Qdi5K82$mOVPfw6@OBn+aBQ6JkEAHfxKu@Vg`=qUkyYl z*f+{JkuA!hG&_ry25@h};?zVVE%Vp#5uqS@vvUGUhyT0*JVh|0Q=D&%=v#4dqlB+q zo^J7$eT0OYve~fy=;WICEF5976!Ez$GXkC=zbE|yuzKKmi=uU=u(sl~roFvbOxMBx zzL^1^4sN-&dh_=-ARql>Etg>D8Ou%i8J{EEn-d+(hYRoHexsj)PQ*jrMz_1p->%{n zee3cJw=7d|v)O^3gjlO-Fzu|Fe?9v;t6E=i)YgVa0ws0aq<`Gz0&Ol)-5@(-J)E8B zbJ?GW=dq6`g_yH_92pVNC*8y~)PESrX`s96ema*2b)HMG6Z(t!l1|)lWCfuzj{TEY z{j?%b&gK!3A?xgD`18>+WCBZ@<>hiP`eU&7;e4u05+&UZc`^=l<`H92Jt#VLW61pI z^=N(YB;?tn+)c6k8}j2L>-|Ez0urUP9+gjRD@hPtfi5M8v+A~Bi-6%KgLdV ziN<5fAGh)1uIz>Pdxy;w4^vN6H`5F2ULE6<_)|EG4QVRj)CE*hEPV|ZHF$=Fb^|5Y zIy~bjVWidMLCK3BtUSqSpla~Q%EU2N<3=5zYQ^}5oGNzaY1HfCgOODTY~0D|hKyM- zLJg9QG%5FphQIj9W?N8|Mh!J?RO!;FS%wWcUE0jkEH!9PNr(v%59<;h?g<6p<2 zRTp^BQY(gkkEkUwE-z><2*iScTQF9p;}m`Qin)&)22Nl}Q#Oi|CtFP)b%$aMkX(38 zr(P1jVEcZ$a+NxqE?GV#L6cxLYUBl)_UB8Jm-^C%4LS@N{{nVosgtcH{{?)PELLDH zMKRb`mhY0Q+->c`5{d~&ELM0fMbTZoQGvgnFI}VVfr0Muu1AJO3NmzOaTAHok>zI0 z?p)>bz8Q6>6P=yE?G+$bTV{IHJuZQ$O2DN(n}S;FE%@Z*f9sEr$3!8wZ`=&nX$Xx1c4v#2`4>_ESr^ zymoOLD=gHn9WP%bS@Mipq$zB!D_nEep8=42pwo-_gD~tA%Z`W6C!5U>Tv2Q>wW{uF zd{pI1#lS}2{4&48{q}HVx2^o_zlxv~_xKa_^b;xKT+Y zqa1Oz@w?9^7M|OaT~2akU+yIz}{^v6Q@T1`6giw7?pY|v{(v69EDu%7jHhBp3@0P)_2 z7qM4_s9sf_%}c4awhW=hW7NTJRGHe=>(ccU?C~0*r)!-tS*eIgI=_ZHqz$#Hxft9V zdcaCO1!iIwiDLq0x<>QurVUi(M(&eVO66F#;KNTMPd^DA(LOra=P-Qzojf(_tVaDF zH6~i@hOCBy8M)P4XF+fCf<^(R2`^0peB~~ahgR4!@0S^KxT(1@%;c^g4Y(5u-M#I- z>2|kM6ME-66Mn{FuD>0O7wy7BO~!v7PlZEHW*{zC;p^6YdCzFnuTyh?VJob{eBH;l zpk|Z!?rb%vUeu(F)aDSOPN0!Uj(gA@S zd7u@rfV`6oZD58ozJo2+Ue%j%y`SXdt*4cp?Sn;6Jt-{@xCNjE`VgumMpQ|sNO zwLON^k)ldFvdXY5LH&5;T`{syBzHb zwNa%@(L$lCX#L{-WYF0bUZnbWmlCW#K@unfsoXdHuQ6n&D2DoE@#;;sER#-q#KxDB z0-a-IL?~LZWk^m5j>2f9e|(wVhiz_`wK;V%GS%MBAwD)&tU+U~GGB4pg@dLFzd-I5 zj~sQ|PsK~62$kcs2{HD*Q+AMx#@iRH~l z7h|EG&3@PX4;r;^xu8`J?L7w##Z~~ zV8>G*+_-4*S62_tOFb=P23O^tmo5@n3}zOcOQ1!PBh6I*6oe$lKdUoqrJU&9HrTe( zwn$l)rzzJssna7y-i<)&;2AnOw4l`m0C0sZ()?shpBOTF##Kgntjt&^EyIPc2 zd2cyy1LojqqW!GdB+SAx7K-fp=L-RJUndUz@cZ8UtP$;W%e3W25i^aa{4R!W5Flcs za6*A(TINAjxH_BL$X-IkV>+Zz@xT_$JCsC@HVUf}>#FUQ8XA*|IQcA6c|;`&9l_-c zp|LPU1+K#e{nsf%M=H2kjlLi-R$BgpP>B+HlL0yF#+^DkoV9AV9v7Q4SuVqNCQ60BC0 zw?WzkBd;yoAxTck?7NtG5D+1rR>5)2r#n_e_KM<t4Z8JLolbR{JuiolwT%(;tqUdKM_r46|1zCf!(gsBU+I z&Wm63HEfs;>>Zx#KM#+=RS6`e#VhPM7)cj0YM>>?KgY*`Nr7dAR9q=CzyHvVJH}ko z|BOrVU8K#^i9>w0xq0ZlL_XbpO5`Sg?VqGGf9pDUn`Hv}JkkZNxXV$0c3r1L$o1$E z9EUY)F3updIZ1AxX6hf6x@MKUw$%vN=8tFrYgjv5@@-@pB|qi5E&Kqo-wSAv8*gzN zBjFa}U3z}?`yu<9&GYNMolw~JZ16hRN$94RPe@7Go2^`>GnpP_)FQtfO^t@3Lp!

j10_E1wk5$p#$(ov;US^kS5j# zweOxBU^oHr6aa`n1Q^Bu71p&)D%wA$ja=|5T(6tt31kMTVp?s`E&$RQz-}JUK5mZ7 zPiAOGFMn#N6EGy&to-==&Dcb}#q_qJl#82X*jS8f@387>Ph;^Uc{8RV5E>6+kLc9f zAV{EmmD|=^C3YFhHN$o0e+Sor+tFL5auIuy#8u(G4_B$(Qoha*8dGs$Sias?=MKhI z)vBShf5ENU|KMqsp!B;g`GGnbybbl?LfgZ`ix({YCQWFRlLI&BJ$up8KJn|wQ@X17 z(ByBYp3gr2Op1U6tb8RRSngOiRKoxO1!@2Q$Nx1E;VZY|AGrvIMql{}_C{Z+2o0JR zHmjUS@7X&1h228d+q~A+wcWg}zS!TRYNBzMq}$#NkNB6r84k}Aht)5Ab_$3$lByLN zCtAlfqIpehkHzj72=4c;PUi4+c?>2?&^hMM&y0GRU_vh*#`Nq;d0##u6P}0m>&W`;>Th-JbMeq z;)-j21acdpFQ?bvx(%~`YR&IDkSF>R>EE2bH=T(Yp0ZD3G^dmPbYlj@q91hrHvDv= zV9l=(oI$QtH~BROa?A~;Quqg?G0Y2b=(FXU?d`n$t!=~L8KAYne&zVvt^5dNy*sEwLs|n@T??c(1MmoNn zX|zjAoek)@ZK-9P5-h9f5*->A&Q2gHy>t&t+HV-5l@=ZhaU+YfnAmk~+m?Dd>oFlr z&Mtgy851=0`k*7o9`-x|;T0Mdb*}KJ0+{ukf50*}zYQ9wWh7#61Yk3nQM9FRyr(Yf z*r1&mbKo*IlwDQztdRZ6BKJ z$&k+N5X=fWw|9DT;?1lXeNIPwn&D+c zBZ7t^(51RTonm4GpdgCw$-X{}BuC`x;xzf>;L@NSt+j{3&EBpr&FjS%1m!3eBPz_t6+-OVVOQL|7hXNFW*0)UI$1a8;Li;^FV7IAC zTRWcWCZIQ^s)a1^+*cqYIwBaqgHjv*6Qw%DCw#HDOA7DdM&{c6sl=7%O@TYpCkamL2d$FWx)cc*i^KvY zN7hY#5l--Bd<~kczi@BE@Tqe1#_pG_e#z`WyLNof)Shp4!Q;=#>9+jCEe+a8DVh`! z`xQlvF;1FG(_XlIUN~GqFM7_R~Ig zsjeCdxze>SHJSSN{OtWFT2*?pIbpRhO}=qx=(>f&U>Ft-7Z2%ZsMUkNOI>-2h58F* zUb&&T$Cyd&N}X~+!;cWlp=5ubF)80+eh)+G8`*vrZhLQk2WcRe{ZX+;Du0diJ#?;j z#7fo!1x!#3oY5^vHj%wj7gpuWskNz*ys#9W@>0M(FMejMxzbvHoTmybY5=?@2$6TN z5H5X=UK=qI_;+VSlkfDB)mvcg2LqaSP*Qm52Eb!yb)T`)^I>@6RJO?e8eXo*c0Q~k8zTP2Ko@IuTik0Srbc%^Q!84u&^Z|gL( z_0EQ1Qnp4k|JC!N?A7zK?DG32JlvM71Z*#LNz(6ZQ|1Bo7=uveTEu?McHXIn)DAYAwuRR1IUHWcohwH@-px!@JpNNUgb7ua*1Az2zqb4Fk!(+igMGd zhxiOrM6aT!*hVL{5Me7$qlE@+$2CK0T%A7lCqFg()|ydb256<7*(|(DG+^!{_viCnHe4HCS?Se#h=~>QZA+&5!6e-cjf#+0O)Cz)|}Z zus_p$1akn}AQ}LF6zCaX=VwLm|68tYh6MW&QhZvncBsA6L8mF@SJB?VA8S}$F25M zcBRSQT~&S~3@5}|Ya6xHM)kzuVK2YeZDmJG;zf0^Noa-%K4YPCL2q3zTweKGW=7>l>C6b&_ z+W;Kg&mvM0^~g({@2V@S4>)y_H1Oe1V+vjW5+yWkq!=oPo=lPVRoH>oDZ}CID1+wKbM8+tQn4eN0gY+{2tvXq;+xf{u$9NS$q?)Y$QgH4B-7Dt&{uN-fA1#3>qh$I zd}(!3&)v|W5_l2_B9_7*Z)t-oZOvnx$02e4)onJb$05AQ1dN0%1|IulNRIaT%INmR z^i}K@{?+*P2`ca_!_~MA35#&6QkB{Ux<9c%EhKlrbIozwkE`oyD8!)7CQ1q?j&bwd z1j>oBIn}b+g9DJ15)>w()@_iznjuAuQ0sY6wm9cjrKRYD2d2AW$5Q0p_E^|#i1Gk z6XqP!-KFJxH0An$q-Kim;C}2jbc`o|_CJ{kI8 z!u%?r|MqV`#M1FG?AlrrhoHNtIMr~coIm$S(+j2OHS?iXi1L?UP2*Ynt-m4=cb;VC z0nKvofq*kCepf6(tGI)#O*C!q@+Iq&|FY#S7&C{KmM0%K2g@#%35)8Py`wBJ_ai?+ zJ}-$Z^Zm#D`W)tRF>Z#BLN%%Fum|K_nY4M7%-rlY(DOA?N`(jXFApVWbSUjg@31kt z9)n9E@C~rlv|ddDxW22~(p~4WgU&9c25nqBwd+|IOBX&y6wzHQj`#!=eQ%x9pFsaw z1>oeRNc+!AH0eSC0E{F60QdiO6=0?3XsTmuWACD8Z}^p){Lh6zrJ9Y+YCEEj&DJxV z;ETT4A-l|N-6ZFaup~iOoYk7FVlZ&XL_xJRLpjoY&VicCPA>5}3Xyy*OQESfSC_6= zfooUJ^}nSfvs}N^7cH-BP7z%j5V|M|d-v(eF|vpcv7|}Jh3e-p2?>6?L3|w+ICuqB z63wKv_kxuLxk2KfUV7EQMaPN^~+D~K~4U};KLG?ucWe1l+CL>5Jxo*P zNU6C@sy`bYVo|+QIo3@-YaZu&XV&&fET=G3x+%!GjSLqmJms@~YvywRBD41%?ZHt+-C3fY)uCHJvhCGGZ z1ioCwWE;sw>>y%U#pWz}IH(@=gtjeK2j|XMnwsf-$)bvAI#LH!^H9nx@H8*6N zd#_Mc_zm;$A^3VQyP-9;iuTu(uzG^qlzz`f$3I~V1Tr1Ry};jECNCUWfO~6%*Q$TS z?k6y%<+21iXcI(&af)A<++M<5UYiMJ8zqP85_k z7y}9e1czs&ZeI3wsMMXd4!!rsi!mnOEnC$A9D%mz{G636WYRz_v9Sv%9dBteB%ypNBeXLDt~-u9-sLEKv8% zULd=)2ckdEDzZIA2~zQ14bWAuOrK;r`JMnh+y;lEY)=%BFp}b~b#%(Ev^C&*=K=cS zvG6e;WQS&IT!P6LBIvmQa$3A^*4Cm03QC)&tuH%yviiMcKikH8ZYs_&T0kwi>9F|p znEk_TgG{;LOe&!vBpEq~_LAZvajAf^*XwctK>Z}Ty66)vtiV-sCN812KrobwY*sMv zR-hmJl;22_JZvpwkjg9si=iKqM~P+rG?t8#{+#|%OFcKA6R&9>+TAQcZ%i>8CQGJ? z%(Mb*LVz9G&&HuBt6jL|E8DLlfkAH~8*DFH-=&)i3Y{(q)_+7$uN#fVX{MhcyoEd) zcdE-^dZlPdir6-G_j`bX4Z|==&zu>0_T%%Ehso>S2DKq3zqni8(!eW;p1Yf-7w4 z=*?s~pQQ6QF$!jt&w1mU3d28_ti-6UZ6FX|`$-K?dmXapiWC*vgl)Yv0$dz<*-Lk* zla?n#jTRb}7M())6Wdlrd?aW%G$o&SNL-oY*oLbOm48gVySy=mdh)B5(X0j(GFaeo(&hl zznoZBTieOFm4A);9J|}o`sDaHTaaeALbo(8*zS7~^W?FtT6OM?c{YcEVfrS%*A8%* zXj~wYf+l``s(cvQ%64!(=DuM;%vNVTpc8G~-6^SsXPB=D0?C5l1GXAW|ih`N*} zF|j53xOdp$%w1U4^o>quT${R>D5RN&uLXm0EjM_0d5~`A+cgqcUw2p8 zCyR;>S5c{?3`(DwkNRDl(d#uACJ%l0^6OKAh7Pzm6_kzBsss6iZcX-+_ubk-_4fRw zk+*nHOE7RgJB3w`qmo3~}<4Wt)CteP|q2~NPA^c#NN$_IuedB(vgGY;W^P=q4qirKW zA#eGy@$*RW#7m@PMuku+;P-LgCi)UmNpgA5aR=vbO=t7o@4q_V=bA7(jUn2(SZIJG zY!|L0y*M58*&Xy*3%peaG1J$pt#bO7AQ;FG=*b7@`+vQ*9(7b(fVPZV?uNwcoWeV1 z(VE6ygnfh-mUusPgnieFfc7i6B3vfrmbBf$h05GkE>MUg1WcHft5xqHa(ydTz*8KJ z(}kX2cq5N?gc`p)O2y^HNDjEl!lOz-b7r2^8M;%Uwjx^c_n30s;dUi=^Xr6m)c=ku z;lqgY;@`?)MAt!W(mURcakYdEYiAzSjJZyqG%HJtTYL(s1iwUxAw-*D6Bhy>Q;4(2 zil8DubR6mzPHgl-B1=h7<%jSXm6+JzHv}_Y1F2u<>n7Fl0Ys5(deewE`J3(sHr& zY9V37u2g_NH%qDco@rGnnjU}wvng~ccjE@lZZ)4Z2aD0#2vDL79yCiHur`HuZ7gqW zZ+oXN8-f^KOQwN$A}1_x(h|t8tN`p#Ws{S|wPP0I>Gka;pXcN~&Ga`D#PuDlAf1k0xQ z9cpO}OL09nT|y~lnVR2a5g}xwq5+gAiN6UD<5}*WQ~;Jh%LU<(*Uu6|racck&pCU} zq=|E$(Rs8L%vI)h;kc#%wCOPN@;G{51?#AI{t-|uTp909;TuAiOmz9Cs|NFdE-L{9 zi~{(diEm%ix3BUSaqX}2cN*Nk5dV${_jk*`5Jdk#{Ec+>MOph7?%xsP{)H3&FSviw zT3(BE6P$mBCB^0N`K%0oeW*l#b5d-cZND z$xu(n(8$@$z(~i~$=blt%*NV*)^uFU!(n!)?vqc+GlU~A*`8f`2Q<752BxX~4|Nqb z5*1X9hSO;&ibNe)-QGMg$y)tk{pRuXk&%Tv9uiz;FOo;A&wFZiwu%IY=iANxsMkw~ z>H>95Zwt#P&Z_+yw$FjOHrj+dA9O+h*fKdezIKdQX3(!LN3r-yWx34w_Y-)b6~dU~ z&tVTX?H9419AP=JLD+4JE{1rkdMBI$JN;D>L@{G%&fG2`BvGNb8*xe4(pG$T zy&Sc})avU%c%;(kD3oFCcjdTki5rB@@q$d7#j$;1#>+FX3fPIH+4#*+n1wJYXWI7B z>pWzGcIN=Vd+ZN~Y4OgNW1P*J;YyXd#Lo>o2JD~9Rgxg0S zZAl*=5220hoX+eiI-d;L_kpe~X?wG|)`Z#XyY<+1ye{ldUOml2lu%5l#Fq5E*;Ob{ z82Q1wPjZ#nTc(=By|=Rye{URl2f8Zp3z^x`;n+yJ;*_m4HuS^G)S@k|5yuHziqmBh z2;fOJuF&}IL->>gq=feDNEu3;^UC^rxc#iLdRcg%8*0bq`rE0;pM&?N`UTOPuj~GI zMjhFnnc0#RZ1@wYA1MnOqbqKr!*5ecxuCMi5S1~%Wx9d7;&p@zrY8j0qRmuw{-pZ8 zfw)bU9Md}9Op6SHNFl(Dl6=$sO!0}Q_;vwD@g3K7V`5^PP{~)K!FRJbjVTu-lKNZ? zXTPG}w58C1#!dl)#(Crx3RS$Qck3Bz^}8r6;4!JJRuhc2aDaQ??i_B2JSC8Jp4`uo zHDG|kxs+n4pDpeZA>|+y<>n`v{|-Q90^IUR0}YkK;hjFWssK8mO4lKl+@?%I-b{dn z^z~3=Zb`{Ar$qPM=hH0?jy6-yWVu3v$Go`hldoz`_ov&IjvXE+svxqCLpW`N?a~39qME4YPc|H&C)Y9p3w1|@3|oBXai0aV4&o)u zFO=;qYU*rqCydahUi=@{dhnNqXNmbiJTS;Ucy%o>&DFmWDUyEJ5rO?)>cFhGvd1POX47b)~MwF240t@G1rwkq+P2O20>bIF_r!)(+-Q~eR;_a{H<*;xdBTVlnom$pWjM> zY}6UY;vyK0By>JDuBxR}DBKBGu%2$E&iI!V1zn4Tyvr||*g7*P(3R#pLYT~(!_#q$ z??(}k34oCF?mQ~nVyMhtz&^8}AH{&PCpLB7gS+rSS-0^?%_g&MSHqtJ2P$&Sr#l1WT39-AWa+8Q zaPOs~LRMz!5>Rxio*;AEG$PGn@JA`cDknPyohjQL0@=tL0))V3fFk8Uzz>8iVA&!_ zcwl~4m>)?yT#$Prx5kx@#kcLwYadlb3qWGK>_jpp{nBh zb3Dz_r+eZW@6wJmS7bRl!2ZzUS4Ia9SpV-#S|xb*0w2_dR`D|8J3SfB8--f?NG11% zNN*YY-~~b02HAeKny_@Uyl@3wg(N9ZY%wc%Oe<&K}yM8a1Bd`@vnQ8{}gLuMk< zzEa7Vxj$TwJkEGO*7u<_bNCSW?PtwLI$UKn5v}Wd?{*A@E5x(JE#-tzsq_d#*jUOW zp?)`oaRsdFAO)A?G!&O-g`f9RNR-OLRR9B7iOQ<-U7LeRdG4uE- z_|i*fL(00T4k?{pBvDoO>5HU4rBGGhNrD`#ge|yQHyFgD#QK-rO(8EvdJz2<2&R69*t;O-@U#IFkdHpdy=o_hTQgOwzb~WHzKGXwbc7*bBazgK^|V z<4m(^@JkmHi~K%Sj=hCVQK(8zZZvtWIt@?GpN~0@r?S>^JC9IQa*O{kk6Y~YR7xU0 zZbEZAhtW`S<9s(+b5idxUeiT=P_L4IC+l;!)oIij`BiQa&sYXsj|tZy1XE9 zxtCJhU(ebR>KKeU?Ptn<;_eFbqw(x7d6xQSS^J%L)J9(yM)O+CJ4gzM z^C3bn_SKgKMH16Vi~f+v1;~OT)gvy$7n+oBNhMN$nB~V;3>=9IKzixOuBtuIl%Nvc zVEpW-XI4uMZV9^>CR$Pc>3VM{70w^2@g9@Klfh?nt6U4LDu%7_m-%@B7rzlisawLE zdN+D@R&~Jbt(hO>w3IJ6FQ29|f-K*g3MzuR8~GgAU+LMA9WAlO}V_T^FW$y}Um^{QnRl*(cjA*Q!=!v+y z$)>re>Pl1kmhc55g25iO7KeA~?svX9P`2BoP7EfU<6({%ma76)_?02Km7kGoh2;A& zqJvURD_h3H;yRIPW$`1wa&lkzWsNGz2+>u@6HVxxmy5~cxMn1+d5Y484mMbjwI>}` zddzy599bsOz>(V8S_955s*+*on&{@GL7iRwNB7AP>omZ6-b{I&>1*PJVPTXi3gvo7 zP->>UyrQI^5_JN=2g}M#`&3b*u11<#eJ&kEF3G)*@(_M&4a8_(-#Xu8dk^F~{kbI1 zi<%FpbDjfTiB9icagxma777Kg4q=RtC@`^1v98J`Z#t!)qQ}xB3-bFOxE_{y7GTxU zYzgvS47B2|r{+jn^c@*q{hI>0psr=FF6e;B^edF z14Bk$I0#JtuUfL2+ln=Q$3wAYS=RO5)ZslC4J*5>EKBO<`iOCo-j6}agUZ_O3F!-h z*7^)o!xT)4^ihj54#ec4{O4NWk=k#9Kp;L0z)uh#J^qn7o1+KCQgC`=TmVx(co9cu`CE!?zI+N3sgmI zQ%hM)oRo8MM00UCC#r}is&My$h$q9~C?cWCOw+u64Nxi#Q&iOwjVC+V18 z#IajELYL=D+O3rBrBo8~bu~7c`U!M;cPxgN-RZN**jz(v0V^vCgWJFJRkSPkd%`gz21zZx7o(XbaX?9Yre~AutM3d@bHHmNI;?Z3@jO z&j7qd`O$sFlC0_iT5*s9Kyxh*3YI0R@k2l8rJ%#nzI^}_v^H6ERuzX*aFUWXtGfG4deBEf$Y^o zh&#E}rCu!d>1dft>s|R(o(U|N`>{?3+=bA&H>Y}aNgEX}Y+IB;D+ogbGnMP>ZgWE8 zBYkw#eGqgM?Yes6_f*IY?FUfM+D{Jc zupTS|YFWkL?aJegw`?5TmN8MnS_-FJuq%QTDgs7PNaV<#?etU@id<=1yDai*1y{qM z_?N5hn2B($GZ_bq$OM>n;RT+s62dTu+rsFBTj1 zdq~#Hd}N+T_$!znTjAn2q7uvRq4&6k2gNI1a+g&rE8X-?$;0%ov>WEyu8y|}#Zvpz zUVLr^=LdUuNcy|1MIxh}BBB|BdXtSTa3@VILll#_ah%4#PWT3T^6?eCWBUc+2c9Z5 zMrZ;R?LO9p?(sW~IAUew|+rJJ(`#rv7Bj8aMdhnFJn4c?Nvap28)woBG z-NoW8<)_95!(;_hZ;fWGjz)HlxZt0KB3JCYjAc@pDw9NyJNO&fS78}9+Tg@}YYt)d zz)6WeY$6rN-XQ}iYkG%W4WgWpr@RhM*@#nleJm?kGD9^Pt+4Q6^aWF16!0EXY^j17o1-G79!|`g7Wea?+70s)=$cbF0OEuLr+`B9#PU?eH0tarqC>%HkcezcJ&Z%=*Bf~Sl&}Ny3U%fI$ zGZT8-Cl%$gm8l|;_)CtXI+)@X*Y!^Y`PdqLB`$jbTQvVf1$jk;k$aqJCkS2YeV|i~ zq=`xQQQKYG!~{Xf=^|ZT^77*F9E;W)4 zng6i*x_`>Fi_3mEk7Q(>JORf&TqR`%EDg(0`~WBk#@KD+|Enw3$gv@uG`nx>Y{7NN1mXqv$}_rXS))qJy1^uWv~v@vz_{QJ z$%vZ1Ub!#FVx+%e@EK_i^*@2#fSgUC;?W~!9J^MPVT3>P0)`mv{C3W1Sf86E*Z7xyWAUk6{oSo=% z*`J8#v5zN(n6rHx84=JY-NZH2e;CMVpu6gRI+q7^o=dP3`iuFJPTX;11)(yI{gYSy zv?5T><`I!0>+ERw^U*S70!y3a<#I6kW3cz(e5y$%`MXJjrRGYVgR)#4%RmMjfDP#rTGtDt6{+)a&7ckyQw6+{x*Nj9D;3 z4U&yCDfftmzxc^!TTqoo4K;35>C&iKh7CDg+RW1|HE2&shwhltlnF!S$zU4eU&oHj0xcTTLHzhhhwnTzE~V zUJ}1x`+mA|l{%a*Sw18|lVCM!t*R$wkg zG1yj??~<$BZSBGmiU~(7R(LK&(Otb!fxn(FU8C-Sf$s3GM}|fUGIVEg6N%1|_~hi}Waso`eR;+Nz*^pf%CdYw z@x5bvyNg-A9H)OsbUmwVI7{NCB+}{Fb^7#_2&@QG!px{t4okliKR(Xmf(Ox*Zq zRin~o%Dqah(Ak24JS(|LJ-qO7?{{Bf$)nYXOPO6Kxa9M*SW& zCR*%(+gF&uG-IQ*(e}E3CqN-N(0} zW|R2tY&ED})TE5m<`AJ!d9Bj5?AO5OQ5Fp12uvqf&NcM0gHbJ0qO1N#mdn`E0f8KO zpcSxyyps%VV1_fkgDuux)thm>pXB7Nr7U3np!BVl!0}R!rA3fSSCH zN1+?N`t7y9+oQru_0{a;;l}x(t1eo{H~a0!(DPt9>U*6SrfN%Zvb;y5+;hp#UW&?f z5liy1Js?!(qV8vHk1^GfPK9H{vhCKrUkMZ8QOLql_z6M@`<>c-pWZ=R+*Z#2V^#hZ zkAIkaMC+6{79bBX^5)XkgOXcGaf!%!aO+(AFcnFe(^PO1V|QIiH$h#5zuOa2>)oZb zJ%-egqDnim%CIa!{dnbE>j}>)sehfV%a;Tbs4VEZ5jo`iR9=B7jOCYDAc$i4^ zTb#p{D<<5Vl_T&{P#q}J6%4csB%F?WBdU5sen2EtqrJYZ8tQo3SsHpb$Vq>+L$ z&ccH90Xm=4E)Q!{w~1D*lv7S~`PiyCqCr5QEoe$}d2!;R;TruWrLlXoAxFcz9PJ9V zQKd`KLZPc@{o?&((AgGVr22Q460AN!5-0P@yRlTLfY#+Q-; zonvG~C|a>)NKOfk!f2#_e3{;dZElyfIdw8J)!xn_J~mgZL1V2lUvb)ngQg0r0r|<$nPA%AwiR1k>W=0Pm@$I9D<;_Rr z#9EGkH#pXUyALu5xYLrmUJpwyNJAfDc~7tdEJQI4=krq@49M->dwofAgUHAG<;keb zY`uxhBc3ImW_Nu4Q-3lG2fwU_1&Tr%)$Pl&4ac_C3ytlHu(L$rVc=7@9y2zG1}GWs z;-58bR^wjS)XX1C8E#xi3Sj&&KbG{hY6rjKPnGNe^j zmO7vM%A#>+rTaJu$zw6#nN>w>fia;`;CEW-p;0k3Gctc>+L&(i-+lc(=vukPR{P~( z$5S8NxM=ZLR}aohJuPDfSLL3UE)rP`W)_`Gphc1+%~bysge1p5t21k*oao&)*tXKP zNLiMrDc4In$*}WR4pg~ zE8LDpZPI&r(E#e1$C&qvnsrwOoGrf&GhzoNTsSn=VQ=6g9npfs#KY;14j8VxT9j9L zZ#i!R=HO|f{jAv}%)&AjitPI53juUrCl3Aa``-Ml5$$x#wB<$-GmWSGE{1LpAY!9% zLV;vj=0R1sI-A?bUP8oUI;2qXz!uCqlthj;3ab+9s_m5;8k34R`7Ba-L?sFx!Q~90 zu`opiuEPfX*C|3rD!5sVz92B<5Woyq$kWFFZ|XiZROw)Q@qpeuf4O~6NYk7$Rr%Lt zGxTT2@iIbU)bs~MYxVmA*}@WndgGI^KNmI7(ATyjP&I!PWeEf*$^Tp`mpFOwWdiy<(gm%!%Ta%JU8h9I_2>~C zhc#<1&LFfoNp7EJ>K~Q5W|h3Q)d<(-k7xmFSUX$tZDbiGKjpeD`~b4w3uurVZ*dzV z;TGaudVcl$A^V!m^Xt8xP}uft@H*H@=%$xXNJ-h7tz4xunI2@+BEKC?jfSB^JGuH` zFdcb1Ku>I$SG$3ZawTQ{Y}WaAI*ZL^tPdti<@QGSd44$u7*x#dSqH&j`+ujY2_?N#K4$l*Z)h~T^3Wzt7sudb1 zTE{h_c};AO#qJmg?)R=v=J0iS3?@s^Ip)vLjCz@1LN6Z1^z2IH2-x+^K3Y6P4@V?| z+4aBy(Dvd|!q~-^+gx%T2R3@(+jfVA3n>$s;_6srn%fM@`@$G6;8kO7Cw6?@EXdWa zfkyQY1i2db#h?S1#&HnCO#5*T`d72&Kx`YqidkecA9VgU{B)vV z&94xgL9SId`85Y}%nhbe_y?ph%nNbov*pGnqb*N#5##{o*Tn%QN^#xPZk(YX8$Ll9 zC47<{0Z}*5{@t`NSOTh*k!?Mr<}YeD^}&o*zMpESdt#ud4nnsEkU3?KejaJmaghXY zy-#8Dcb=fLL~Es z#~by6>2bzW_<_iER+%VTrQ6r99k^_088X%An;}dQ%MQ+L8Q%3G3!z!?hqhk4x|`?y z<9FVF01VjDXnad|>9FK{OI>Hfrri}jo8uMFnNlY5kKH%VY|Ap;H@-)uwre%b*YE5fD!H&3Zfa;U9u%pCpExrADZjQ zkk90l9~&|Ri~==tY2Ykvp0}{*_NMWq#IdC-k5XJywXPys27eiZ7q9MI4l5U^A3mq?U zv*i^&dxL*ciZRj$DI`2~P$(1$RMVxs{i=vZQz?)`nq^lvU9z7l-+6#1W(7YE1k(Af z8xH?y8pKz_PjL-8bB|WDl{=_tM&46UOa*F>>LXYA8lnt(^ZMOrr{HCn5`M8u+RaU7 z119S*Vo>{oQXBpgr8>kXe6hDn3h&`Y=Gy(K#FghwfjiSD2~O(=t&-Tf6bTrM!~!Kp z)=hp9PVi-X4VtXKaBsu#sdDqi?w727$?QP8c6`s&o^N)+?N=F;P*fB$-`ahVJot! zt{Mut(zP!&nfmwq?ENQNReG~IVYM(#zHw;ix`o4F7#0o}59w&A)q}rFU3rRy`U_-U zxuLknm`UwQopM3Lj}XhDWPhJADc@m!4@2o2*?t#pdvAXSX&{&VQL#rVe~t7#bgp;A zO4b7fOi&D*(Je?ek-btER^`m8wW*Q3uoRy1QoubgerBw>(prCol|V z&W2!8wnjAn)$^k4)$_9K^7|${+?K2aY%g_5((i0j<^lE?gHYyL#D2|o-n*v3&m$(i zZ}oO7B5%7QOm39yhpm{t6f?ZU2tq3+kolsok$z}yR_xl?VT<5?Cq4xNCrz6#ylzB! zt6|6_jv4m;f!03J$*yE{J-z~7;OLg1&u%$;Q>S5eVM*#dwK7_Y$ug|&v853sH)+D~ zNKQ@4#UBWxZ;aO=Lgpp|$c4^$6%BUsGVmzyOPlFla?`7a z_zY7-ucD{eMklopVJl9fg$8WLHA8A#oj&#_KQ;W;no(i~Xr-OmEWAoIVD2OL>nKi3 ztAy9Ab|X1SD3OdMIYEKOG+NdtBT&9ISaNZG$L@6MQe#lfkLWkvQRpVw&jen;QTr9J zKht~!a{${Q8WF{~V^;eDc zd!Obf0`7L|AyVEZt4*r4KtFpL>sV4nrj%800lJE)Vq))=?e~`@1!Yv}xqyVnt@c!Q zrODo1RemE3FEb+yo-cVfwcP2I<%P`M+7E9F#-F3NM%NOo&Lb}l0IoE(g69z$Y|$$< z57~X`lKfL}L1|FcNg3uoQ0S{FHm5+pOY9$<>n&alvvc4GAi9sW?;i%{h)6yqlAKW6 z036)UB2p3c$V;5>sw=AxICYXV@ZnEmC55EqaE_TfI_JLa&#EI>ubEr@=z{=C213`y zDZ>1Y-gNU$cpTl?F+T#-`)j^8t8YC`IKdp&gT72h=#9g{KazaT=+;kE3R}>w?l~bF zsZ6;NB{XfM7%GRJOp*6h*n!t6!{O~HgXY$A?oTjMxWztj1oxJ$ph0pxl9rkznwN%) z0b7*$&F_MtZM^jXjcNl3Lc%BFo7JbVmB+Zr5bOrX8Fy7A)6s3vS91Y>?y2C38R^=>&*1l&gv2oUZ&5d2m%HtoHOoqE0WiWJ`z^VSrPvbs2lrkXz2V}!==qzDgOytO)do=R_=bRi5FUQwGmHlUL ze$#mETcLsc2|7d}Q5(OoQ#_yiPvZ=KVhK>u0= z;N+!9`_D@>=|TYjj3fX6_y2VjV5R41s$*gO;C34Xgld>s}zcm-7w z&7`#Vf|UiiLG&KV9EMd1kBGDtLLyVEA-52#_ZJdCk{CcBl`Pe)C!_?;Th88-q>@G8 z=XV>ZXUQW$dB(f`(JTrO3u^{!Vho7{Qjz|Y28tIXTC-MFo&uH{;09x(iwKO1Fe@g9EQTCn@;^(LaBdgvYa~FhCUgs>7eIiXwF=ChJ4>{2H3FR{)gcf1$?dyA1qp_W z7atYs8rhW=O}7b7sWKcZh%+5v1rz~|E|8puCg?y+B1b(x0R%~RYxz>zKP4MIOjG7a zskuz5KN}rlQN2?+)=fWa9_M>!*7j+r=l8u7F@hPDE0O_4t&f5EzGbB!^~#LEn-Mn| zF4-_2P%WWiD{^C8@;zDHG(Fx|=@?{}TYy=!k~rZgm}<*yi zzFfs*8_7oOAYxg?<}7+Rs2=o1UsStee)h~r=!gEcq{`;-nIHS?<(`ZUCmzz{s^>Os ziecjq=1Vu;YfaUM;>U!PS~f2dmt4-Q%Ar1w2W8*3bh8Z9^g~+2%Vn)KKA__@H)NZ8 zuTWL^4fF6J_ieH%AUcy{nn+atbC)C13xX?Zwu1T%UUv91={Uo}&=o2lhz*Tc5E}^zSFqDdHRxt2Z zpdb8{-$;@?Y%OGv$}9wnp&yb*iDmvYmW-1Aoc>WuJvW~duW29J-7G@({rD#ct*fw_edw_xs!!SwDoEgJ&h*Zmlps!MVOBuGPx&3PU`T1nMt<%TOh}-I( z=vn9E-Qb6|VSb*W?c2qPIX!)5IOv*!D{SfL z&15;Br1Lj13TBnhdE=W3!#|g-#Hg-qAP`^sNexeX9kS<&6cyTpZM`%CTpW4XOLwS~ zmM24v78;cnokI8%+g3$`X;{D4se-h zTp*HyCVqaZd>Gowc5poC?PGnf6Ey5dwXP%?gKnksyw2_<@RLp@ib2U|4sw}@x|Aj{ zu_gMrci<}a7D}eJPCl>g?T=lp5u1|7KSEJn1Wol^|BP6)g1=XuSkT;PJ**<(UZ?opf94}JIY>r;Y;4!AfKl#SD>1Nnq*P4<)b-P%F*_WY%h zw|GzE-ijyjt6$qJUirCdELM|VACCo3UgNdx`dK3^SJHiUFwLM4T{CjRd(FXkPC_x& z{hy9Zopk2ar?{1QTv(Jrd0M7=&#kFr!)Si8AQdn!J(4#5%0pWYMPa zs5>6%pMYK>Tu=~~E!3T0Oqi9cRqr5jeJfYMQyh)c zg`Qt{Bae238oxVA#pT6F4!Fv~qe?+@W}ejKRHB3knIm~!3Wb|rW7>x6dH|Bfo* z!-(_Z-^yV`*FkO4JKm0QwS*08XCBpzxlW%nD@%-9d3M4Mp~7Xlwsh_lCv zpdvuz``x7-XW?>KI+4kyZ`AFH?#ePFL04x4v-g^6#AWtff>k|6-5&Cu?JYH~uBl5W z3Q~RDeD=OR4#uX;mqj9)JO}DRe4#;|9%cHJ>#Hi_zK$P@)VTG)o?^HidR=EN^UY zd#5iOf*4*)rh#`NCoFK%63DNt0PIlYPI#s|Q(bnz+0K|j(kYEMd+8YIBJIGzum_&k{qXJr6t2IeX5e ziF2OOd9)SGRpxi$xTXNK=`iy0IC@_N>!^7C5l}5$8ShTv8$y>%bor*M2J?X~D**(I z0{EYaZ(q~5uksgh?XU888r;7S|BeXvcgw#JME^kijdb=!S^F36-x1^fg%kfTxPQ{* z{%7=GM7h70zoNQ+$zSjJ_o#nI|BE*FPcQ$TIQN$qAOOKvUG*jZlb8PwjqYDR^xtoa ze|i2#5H`a9V~~HhMgE_|{JUBAzryg){2#;o4|8x?2{7=#yg__jBwq~4N7lcz{|9cj Bw2%M* diff --git a/hipify-clang/patches/patch_for_clang_8.0.1_bug_38811.zip b/hipify-clang/patches/patch_for_clang_8.0.1_bug_38811.zip deleted file mode 100644 index 16d66a20621fce2101711a0cd3b9360830387e3f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14641 zcma)j1C(XWlJzaC%eL8NySln;+qP}nw%KKLnO(MR+jjl+JT3(BE6P$mBCB^0N`K%0oeW*l#b5d-cZND z$xu(n(8$@$z(~i~$=blt%*NV*)^uFU!(n!)?vqc+GlU~A*`8f`2Q<752BxX~4|Nqb z5*1X9hSO;&ibNe)-QGMg$y)tk{pRuXk&%Tv9uiz;FOo;A&wFZiwu%IY=iANxsMkw~ z>H>95Zwt#P&Z_+yw$FjOHrj+dA9O+h*fKdezIKdQX3(!LN3r-yWx34w_Y-)b6~dU~ z&tVTX?H9419AP=JLD+4JE{1rkdMBI$JN;D>L@{G%&fG2`BvGNb8*xe4(pG$T zy&Sc})avU%c%;(kD3oFCcjdTki5rB@@q$d7#j$;1#>+FX3fPIH+4#*+n1wJYXWI7B z>pWzGcIN=Vd+ZN~Y4OgNW1P*J;YyXd#Lo>o2JD~9Rgxg0S zZAl*=5220hoX+eiI-d;L_kpe~X?wG|)`Z#XyY<+1ye{ldUOml2lu%5l#Fq5E*;Ob{ z82Q1wPjZ#nTc(=By|=Rye{URl2f8Zp3z^x`;n+yJ;*_m4HuS^G)S@k|5yuHziqmBh z2;fOJuF&}IL->>gq=feDNEu3;^UC^rxc#iLdRcg%8*0bq`rE0;pM&?N`UTOPuj~GI zMjhFnnc0#RZ1@wYA1MnOqbqKr!*5ecxuCMi5S1~%Wx9d7;&p@zrY8j0qRmuw{-pZ8 zfw)bU9Md}9Op6SHNFl(Dl6=$sO!0}Q_;vwD@g3K7V`5^PP{~)K!FRJbjVTu-lKNZ? zXTPG}w58C1#!dl)#(Crx3RS$Qck3Bz^}8r6;4!JJRuhc2aDaQ??i_B2JSC8Jp4`uo zHDG|kxs+n4pDpeZA>|+y<>n`v{|-Q90^IUR0}YkK;hjFWssK8mO4lKl+@?%I-b{dn z^z~3=Zb`{Ar$qPM=hH0?jy6-yWVu3v$Go`hldoz`_ov&IjvXE+svxqCLpW`N?a~39qME4YPc|H&C)Y9p3w1|@3|oBXai0aV4&o)u zFO=;qYU*rqCydahUi=@{dhnNqXNmbiJTS;Ucy%o>&DFmWDUyEJ5rO?)>cFhGvd1POX47b)~MwF240t@G1rwkq+P2O20>bIF_r!)(+-Q~eR;_a{H<*;xdBTVlnom$pWjM> zY}6UY;vyK0By>JDuBxR}DBKBGu%2$E&iI!V1zn4Tyvr||*g7*P(3R#pLYT~(!_#q$ z??(}k34oCF?mQ~nVyMhtz&^8}AH{&PCpLB7gS+rSS-0^?%_g&MSHqtJ2P$&Sr#l1WT39-AWa+8Q zaPOs~LRMz!5>Rxio*;AEG$PGn@JA`cDknPyohjQL0@=tL0))V3fFk8Uzz>8iVA&!_ zcwl~4m>)?yT#$Prx5kx@#kcLwYadlb3qWGK>_jpp{nBh zb3Dz_r+eZW@6wJmS7bRl!2ZzUS4Ia9SpV-#S|xb*0w2_dR`D|8J3SfB8--f?NG11% zNN*YY-~~b02HAeKny_@Uyl@3wg(N9ZY%wc%Oe<&K}yM8a1Bd`@vnQ8{}gLuMk< zzEa7Vxj$TwJkEGO*7u<_bNCSW?PtwLI$UKn5v}Wd?{*A@E5x(JE#-tzsq_d#*jUOW zp?)`oaRsdFAO)A?G!&O-g`f9RNR-OLRR9B7iOQ<-U7LeRdG4uE- z_|i*fL(00T4k?{pBvDoO>5HU4rBGGhNrD`#ge|yQHyFgD#QK-rO(8EvdJz2<2&R69*t;O-@U#IFkdHpdy=o_hTQgOwzb~WHzKGXwbc7*bBazgK^|V z<4m(^@JkmHi~K%Sj=hCVQK(8zZZvtWIt@?GpN~0@r?S>^JC9IQa*O{kk6Y~YR7xU0 zZbEZAhtW`S<9s(+b5idxUeiT=P_L4IC+l;!)oIij`BiQa&sYXsj|tZy1XE9 zxtCJhU(ebR>KKeU?Ptn<;_eFbqw(x7d6xQSS^J%L)J9(yM)O+CJ4gzM z^C3bn_SKgKMH16Vi~f+v1;~OT)gvy$7n+oBNhMN$nB~V;3>=9IKzixOuBtuIl%Nvc zVEpW-XI4uMZV9^>CR$Pc>3VM{70w^2@g9@Klfh?nt6U4LDu%7_m-%@B7rzlisawLE zdN+D@R&~Jbt(hO>w3IJ6FQ29|f-K*g3MzuR8~GgAU+LMA9WAlO}V_T^FW$y}Um^{QnRl*(cjA*Q!=!v+y z$)>re>Pl1kmhc55g25iO7KeA~?svX9P`2BoP7EfU<6({%ma76)_?02Km7kGoh2;A& zqJvURD_h3H;yRIPW$`1wa&lkzWsNGz2+>u@6HVxxmy5~cxMn1+d5Y484mMbjwI>}` zddzy599bsOz>(V8S_955s*+*on&{@GL7iRwNB7AP>omZ6-b{I&>1*PJVPTXi3gvo7 zP->>UyrQI^5_JN=2g}M#`&3b*u11<#eJ&kEF3G)*@(_M&4a8_(-#Xu8dk^F~{kbI1 zi<%FpbDjfTiB9icagxma777Kg4q=RtC@`^1v98J`Z#t!)qQ}xB3-bFOxE_{y7GTxU zYzgvS47B2|r{+jn^c@*q{hI>0psr=FF6e;B^edF z14Bk$I0#JtuUfL2+ln=Q$3wAYS=RO5)ZslC4J*5>EKBO<`iOCo-j6}agUZ_O3F!-h z*7^)o!xT)4^ihj54#ec4{O4NWk=k#9Kp;L0z)uh#J^qn7o1+KCQgC`=TmVx(co9cu`CE!?zI+N3sgmI zQ%hM)oRo8MM00UCC#r}is&My$h$q9~C?cWCOw+u64Nxi#Q&iOwjVC+V18 z#IajELYL=D+O3rBrBo8~bu~7c`U!M;cPxgN-RZN**jz(v0V^vCgWJFJRkSPkd%`gz21zZx7o(XbaX?9Yre~AutM3d@bHHmNI;?Z3@jO z&j7qd`O$sFlC0_iT5*s9Kyxh*3YI0R@k2l8rJ%#nzI^}_v^H6ERuzX*aFUWXtGfG4deBEf$Y^o zh&#E}rCu!d>1dft>s|R(o(U|N`>{?3+=bA&H>Y}aNgEX}Y+IB;D+ogbGnMP>ZgWE8 zBYkw#eGqgM?Yes6_f*IY?FUfM+D{Jc zupTS|YFWkL?aJegw`?5TmN8MnS_-FJuq%QTDgs7PNaV<#?etU@id<=1yDai*1y{qM z_?N5hn2B($GZ_bq$OM>n;RT+s62dTu+rsFBTj1 zdq~#Hd}N+T_$!znTjAn2q7uvRq4&6k2gNI1a+g&rE8X-?$;0%ov>WEyu8y|}#Zvpz zUVLr^=LdUuNcy|1MIxh}BBB|BdXtSTa3@VILll#_ah%4#PWT3T^6?eCWBUc+2c9Z5 zMrZ;R?LO9p?(sW~IAUew|+rJJ(`#rv7Bj8aMdhnFJn4c?Nvap28)woBG z-NoW8<)_95!(;_hZ;fWGjz)HlxZt0KB3JCYjAc@pDw9NyJNO&fS78}9+Tg@}YYt)d zz)6WeY$6rN-XQ}iYkG%W4WgWpr@RhM*@#nleJm?kGD9^Pt+4Q6^aWF16!0EXY^j17o1-G79!|`g7Wea?+70s)=$cbF0OEuLr+`B9#PU?eH0tarqC>%HkcezcJ&Z%=*Bf~Sl&}Ny3U%fI$ zGZT8-Cl%$gm8l|;_)CtXI+)@X*Y!^Y`PdqLB`$jbTQvVf1$jk;k$aqJCkS2YeV|i~ zq=`xQQQKYG!~{Xf=^|ZT^77*F9E;W)4 zng6i*x_`>Fi_3mEk7Q(>JORf&TqR`%EDg(0`~WBk#@KD+|Enw3$gv@uG`nx>Y{7NN1mXqv$}_rXS))qJy1^uWv~v@vz_{QJ z$%vZ1Ub!#FVx+%e@EK_i^*@2#fSgUC;?W~!9J^MPVT3>P0)`mv{C3W1Sf86E*Z7xyWAUk6{oSo=% z*`J8#v5zN(n6rHx84=JY-NZH2e;CMVpu6gRI+q7^o=dP3`iuFJPTX;11)(yI{gYSy zv?5T><`I!0>+ERw^U*S70!y3a<#I6kW3cz(e5y$%`MXJjrRGYVgR)#4%RmMjfDP#rTGtDt6{+)a&7ckyQw6+{x*Nj9D;3 z4U&yCDfftmzxc^!TTqoo4K;35>C&iKh7CDg+RW1|HE2&shwhltlnF!S$zU4eU&oHj0xcTTLHzhhhwnTzE~V zUJ}1x`+mA|l{%a*Sw18|lVCM!t*R$wkg zG1yj??~<$BZSBGmiU~(7R(LK&(Otb!fxn(FU8C-Sf$s3GM}|fUGIVEg6N%1|_~hi}Waso`eR;+Nz*^pf%CdYw z@x5bvyNg-A9H)OsbUmwVI7{NCB+}{Fb^7#_2&@QG!px{t4okliKR(Xmf(Ox*Zq zRin~o%Dqah(Ak24JS(|LJ-qO7?{{Bf$)nYXOPO6Kxa9M*SW& zCR*%(+gF&uG-IQ*(e}E3CqN-N(0} zW|R2tY&ED})TE5m<`AJ!d9Bj5?AO5OQ5Fp12uvqf&NcM0gHbJ0qO1N#mdn`E0f8KO zpcSxyyps%VV1_fkgDuux)thm>pXB7Nr7U3np!BVl!0}R!rA3fSSCH zN1+?N`t7y9+oQru_0{a;;l}x(t1eo{H~a0!(DPt9>U*6SrfN%Zvb;y5+;hp#UW&?f z5liy1Js?!(qV8vHk1^GfPK9H{vhCKrUkMZ8QOLql_z6M@`<>c-pWZ=R+*Z#2V^#hZ zkAIkaMC+6{79bBX^5)XkgOXcGaf!%!aO+(AFcnFe(^PO1V|QIiH$h#5zuOa2>)oZb zJ%-egqDnim%CIa!{dnbE>j}>)sehfV%a;Tbs4VEZ5jo`iR9=B7jOCYDAc$i4^ zTb#p{D<<5Vl_T&{P#q}J6%4csB%F?WBdU5sen2EtqrJYZ8tQo3SsHpb$Vq>+L$ z&ccH90Xm=4E)Q!{w~1D*lv7S~`PiyCqCr5QEoe$}d2!;R;TruWrLlXoAxFcz9PJ9V zQKd`KLZPc@{o?&((AgGVr22Q460AN!5-0P@yRlTLfY#+Q-; zonvG~C|a>)NKOfk!f2#_e3{;dZElyfIdw8J)!xn_J~mgZL1V2lUvb)ngQg0r0r|<$nPA%AwiR1k>W=0Pm@$I9D<;_Rr z#9EGkH#pXUyALu5xYLrmUJpwyNJAfDc~7tdEJQI4=krq@49M->dwofAgUHAG<;keb zY`uxhBc3ImW_Nu4Q-3lG2fwU_1&Tr%)$Pl&4ac_C3ytlHu(L$rVc=7@9y2zG1}GWs z;-58bR^wjS)XX1C8E#xi3Sj&&KbG{hY6rjKPnGNe^j zmO7vM%A#>+rTaJu$zw6#nN>w>fia;`;CEW-p;0k3Gctc>+L&(i-+lc(=vukPR{P~( z$5S8NxM=ZLR}aohJuPDfSLL3UE)rP`W)_`Gphc1+%~bysge1p5t21k*oao&)*tXKP zNLiMrDc4In$*}WR4pg~ zE8LDpZPI&r(E#e1$C&qvnsrwOoGrf&GhzoNTsSn=VQ=6g9npfs#KY;14j8VxT9j9L zZ#i!R=HO|f{jAv}%)&AjitPI53juUrCl3Aa``-Ml5$$x#wB<$-GmWSGE{1LpAY!9% zLV;vj=0R1sI-A?bUP8oUI;2qXz!uCqlthj;3ab+9s_m5;8k34R`7Ba-L?sFx!Q~90 zu`opiuEPfX*C|3rD!5sVz92B<5Woyq$kWFFZ|XiZROw)Q@qpeuf4O~6NYk7$Rr%Lt zGxTT2@iIbU)bs~MYxVmA*}@WndgGI^KNmI7(ATyjP&I!PWeEf*$^Tp`mpFOwWdiy<(gm%!%Ta%JU8h9I_2>~C zhc#<1&LFfoNp7EJ>K~Q5W|h3Q)d<(-k7xmFSUX$tZDbiGKjpeD`~b4w3uurVZ*dzV z;TGaudVcl$A^V!m^Xt8xP}uft@H*H@=%$xXNJ-h7tz4xunI2@+BEKC?jfSB^JGuH` zFdcb1Ku>I$SG$3ZawTQ{Y}WaAI*ZL^tPdti<@QGSd44$u7*x#dSqH&j`+ujY2_?N#K4$l*Z)h~T^3Wzt7sudb1 zTE{h_c};AO#qJmg?)R=v=J0iS3?@s^Ip)vLjCz@1LN6Z1^z2IH2-x+^K3Y6P4@V?| z+4aBy(Dvd|!q~-^+gx%T2R3@(+jfVA3n>$s;_6srn%fM@`@$G6;8kO7Cw6?@EXdWa zfkyQY1i2db#h?S1#&HnCO#5*T`d72&Kx`YqidkecA9VgU{B)vV z&94xgL9SId`85Y}%nhbe_y?ph%nNbov*pGnqb*N#5##{o*Tn%QN^#xPZk(YX8$Ll9 zC47<{0Z}*5{@t`NSOTh*k!?Mr<}YeD^}&o*zMpESdt#ud4nnsEkU3?KejaJmaghXY zy-#8Dcb=fLL~Es z#~by6>2bzW_<_iER+%VTrQ6r99k^_088X%An;}dQ%MQ+L8Q%3G3!z!?hqhk4x|`?y z<9FVF01VjDXnad|>9FK{OI>Hfrri}jo8uMFnNlY5kKH%VY|Ap;H@-)uwre%b*YE5fD!H&3Zfa;U9u%pCpExrADZjQ zkk90l9~&|Ri~==tY2Ykvp0}{*_NMWq#IdC-k5XJywXPys27eiZ7q9MI4l5U^A3mq?U zv*i^&dxL*ciZRj$DI`2~P$(1$RMVxs{i=vZQz?)`nq^lvU9z7l-+6#1W(7YE1k(Af z8xH?y8pKz_PjL-8bB|WDl{=_tM&46UOa*F>>LXYA8lnt(^ZMOrr{HCn5`M8u+RaU7 z119S*Vo>{oQXBpgr8>kXe6hDn3h&`Y=Gy(K#FghwfjiSD2~O(=t&-Tf6bTrM!~!Kp z)=hp9PVi-X4VtXKaBsu#sdDqi?w727$?QP8c6`s&o^N)+?N=F;P*fB$-`ahVJot! zt{Mut(zP!&nfmwq?ENQNReG~IVYM(#zHw;ix`o4F7#0o}59w&A)q}rFU3rRy`U_-U zxuLknm`UwQopM3Lj}XhDWPhJADc@m!4@2o2*?t#pdvAXSX&{&VQL#rVe~t7#bgp;A zO4b7fOi&D*(Je?ek-btER^`m8wW*Q3uoRy1QoubgerBw>(prCol|V z&W2!8wnjAn)$^k4)$_9K^7|${+?K2aY%g_5((i0j<^lE?gHYyL#D2|o-n*v3&m$(i zZ}oO7B5%7QOm39yhpm{t6f?ZU2tq3+kolsok$z}yR_xl?VT<5?Cq4xNCrz6#ylzB! zt6|6_jv4m;f!03J$*yE{J-z~7;OLg1&u%$;Q>S5eVM*#dwK7_Y$ug|&v853sH)+D~ zNKQ@4#UBWxZ;aO=Lgpp|$c4^$6%BUsGVmzyOPlFla?`7a z_zY7-ucD{eMklopVJl9fg$8WLHA8A#oj&#_KQ;W;no(i~Xr-OmEWAoIVD2OL>nKi3 ztAy9Ab|X1SD3OdMIYEKOG+NdtBT&9ISaNZG$L@6MQe#lfkLWkvQRpVw&jen;QTr9J zKht~!a{${Q8WF{~V^;eDc zd!Obf0`7L|AyVEZt4*r4KtFpL>sV4nrj%800lJE)Vq))=?e~`@1!Yv}xqyVnt@c!Q zrODo1RemE3FEb+yo-cVfwcP2I<%P`M+7E9F#-F3NM%NOo&Lb}l0IoE(g69z$Y|$$< z57~X`lKfL}L1|FcNg3uoQ0S{FHm5+pOY9$<>n&alvvc4GAi9sW?;i%{h)6yqlAKW6 z036)UB2p3c$V;5>sw=AxICYXV@ZnEmC55EqaE_TfI_JLa&#EI>ubEr@=z{=C213`y zDZ>1Y-gNU$cpTl?F+T#-`)j^8t8YC`IKdp&gT72h=#9g{KazaT=+;kE3R}>w?l~bF zsZ6;NB{XfM7%GRJOp*6h*n!t6!{O~HgXY$A?oTjMxWztj1oxJ$ph0pxl9rkznwN%) z0b7*$&F_MtZM^jXjcNl3Lc%BFo7JbVmB+Zr5bOrX8Fy7A)6s3vS91Y>?y2C38R^=>&*1l&gv2oUZ&5d2m%HtoHOoqE0WiWJ`z^VSrPvbs2lrkXz2V}!==qzDgOytO)do=R_=bRi5FUQwGmHlUL ze$#mETcLsc2|7d}Q5(OoQ#_yiPvZ=KVhK>u0= z;N+!9`_D@>=|TYjj3fX6_y2VjV5R41s$*gO;C34Xgld>s}zcm-7w z&7`#Vf|UiiLG&KV9EMd1kBGDtLLyVEA-52#_ZJdCk{CcBl`Pe)C!_?;Th88-q>@G8 z=XV>ZXUQW$dB(f`(JTrO3u^{!Vho7{Qjz|Y28tIXTC-MFo&uH{;09x(iwKO1Fe@g9EQTCn@;^(LaBdgvYa~FhCUgs>7eIiXwF=ChJ4>{2H3FR{)gcf1$?dyA1qp_W z7atYs8rhW=O}7b7sWKcZh%+5v1rz~|E|8puCg?y+B1b(x0R%~RYxz>zKP4MIOjG7a zskuz5KN}rlQN2?+)=fWa9_M>!*7j+r=l8u7F@hPDE0O_4t&f5EzGbB!^~#LEn-Mn| zF4-_2P%WWiD{^C8@;zDHG(Fx|=@?{}TYy=!k~rZgm}<*yi zzFfs*8_7oOAYxg?<}7+Rs2=o1UsStee)h~r=!gEcq{`;-nIHS?<(`ZUCmzz{s^>Os ziecjq=1Vu;YfaUM;>U!PS~f2dmt4-Q%Ar1w2W8*3bh8Z9^g~+2%Vn)KKA__@H)NZ8 zuTWL^4fF6J_ieH%AUcy{nn+atbC)C13xX?Zwu1T%UUv91={Uo}&=o2lhz*Tc5E}^zSFqDdHRxt2Z zpdb8{-$;@?Y%OGv$}9wnp&yb*iDmvYmW-1Aoc>WuJvW~duW29J-7G@({rD#ct*fw_edw_xs!!SwDoEgJ&h*Zmlps!MVOBuGPx&3PU`T1nMt<%TOh}-I( z=vn9E-Qb6|VSb*W?c2qPIX!)5IOv*!D{SfL z&15;Br1Lj13TBnhdE=W3!#|g-#Hg-qAP`^sNexeX9kS<&6cyTpZM`%CTpW4XOLwS~ zmM24v78;cnokI8%+g3$`X;{D4se-h zTp*HyCVqaZd>Gowc5poC?PGnf6Ey5dwXP%?gKnksyw2_<@RLp@ib2U|4sw}@x|Aj{ zu_gMrci<}a7D}eJPCl>g?T=lp5u1|7KSEJn1Wol^|BP6)g1=XuSkT;PJ**<(UZ?opf94}JIY>r;Y;4!AfKl#SD>1Nnq*P4<)b-P%F*_WY%h zw|GzE-ijyjt6$qJUirCdELM|VACCo3UgNdx`dK3^SJHiUFwLM4T{CjRd(FXkPC_x& z{hy9Zopk2ar?{1QTv(Jrd0M7=&#kFr!)Si8AQdn!J(4#5%0pWYMPa zs5>6%pMYK>Tu=~~E!3T0Oqi9cRqr5jeJfYMQyh)c zg`Qt{Bae238oxVA#pT6F4!Fv~qe?+@W}ejKRHB3knIm~!3Wb|rW7>x6dH|Bfo* z!-(_Z-^yV`*FkO4JKm0QwS*08XCBpzxlW%nD@%-9d3M4Mp~7Xlwsh_lCv zpdvuz``x7-XW?>KI+4kyZ`AFH?#ePFL04x4v-g^6#AWtff>k|6-5&Cu?JYH~uBl5W z3Q~RDeD=OR4#uX;mqj9)JO}DRe4#;|9%cHJ>#Hi_zK$P@)VTG)o?^HidR=EN^UY zd#5iOf*4*)rh#`NCoFK%63DNt0PIlYPI#s|Q(bnz+0K|j(kYEMd+8YIBJIGzum_&k{qXJr6t2IeX5e ziF2OOd9)SGRpxi$xTXNK=`iy0IC@_N>!^7C5l}5$8ShTv8$y>%bor*M2J?X~D**(I z0{EYaZ(q~5uksgh?XU888r;7S|BeXvcgw#JME^kijdb=!S^F36-x1^fg%kfTxPQ{* z{%7=GM7h70zoNQ+$zSjJ_o#nI|BE*FPcQ$TIQN$qAOOKvUG*jZlb8PwjqYDR^xtoa ze|i2#5H`a9V~~HhMgE_|{JUBAzryg){2#;o4|8x?2{7=#yg__jBwq~4N7lcz{|7+i Bw3+|_ diff --git a/hipify-clang/src/ArgParse.cpp b/hipify-clang/src/ArgParse.cpp deleted file mode 100644 index cbf2f941e8..0000000000 --- a/hipify-clang/src/ArgParse.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "ArgParse.h" - -cl::OptionCategory ToolTemplateCategory("CUDA to HIP source translator options"); - -cl::opt OutputFilename("o", - cl::desc("Output filename"), - cl::value_desc("filename"), - cl::cat(ToolTemplateCategory)); - -cl::opt OutputDir("o-dir", - cl::desc("Output directory"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt GeneratePerl("perl", - cl::desc("Generate hipify-perl"), - cl::value_desc("perl"), - cl::cat(ToolTemplateCategory)); - -cl::opt GeneratePython("python", - cl::desc("Generate hipify-python"), - cl::value_desc("python"), - cl::cat(ToolTemplateCategory)); - -cl::opt OutputHipifyPerlDir("o-hipify-perl-dir", - cl::desc("Output directory for hipify-perl script"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt OutputPythonMapDir("o-python-map-dir", - cl::desc("Output directory for Python map"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt TemporaryDir("temp-dir", - cl::desc("Temporary directory"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt CudaPath("cuda-path", - cl::desc("CUDA installation path"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt SaveTemps("save-temps", - cl::desc("Save temporary files"), - cl::value_desc("save-temps"), - cl::cat(ToolTemplateCategory)); - -cl::opt Verbose("v", - cl::desc("Show commands to run and use verbose output"), - cl::value_desc("v"), - cl::cat(ToolTemplateCategory)); - -cl::opt TranslateToRoc("roc", - cl::desc("Translate to roc instead of hip where it is possible"), - cl::value_desc("roc"), - cl::cat(ToolTemplateCategory)); - -cl::opt Inplace("inplace", - cl::desc("Modify input file inplace, replacing input with hipified output, save backup in .prehip file"), - cl::value_desc("inplace"), - cl::cat(ToolTemplateCategory)); - -cl::opt NoBackup("no-backup", - cl::desc("Don't create a backup file for the hipified source"), - cl::value_desc("no-backup"), - cl::cat(ToolTemplateCategory)); - -cl::opt NoOutput("no-output", - cl::desc("Don't write any translated output to stdout"), - cl::value_desc("no-output"), - cl::cat(ToolTemplateCategory)); - -cl::opt PrintStats("print-stats", - cl::desc("Print translation statistics"), - cl::value_desc("print-stats"), - cl::cat(ToolTemplateCategory)); - -cl::opt PrintStatsCSV("print-stats-csv", - cl::desc("Print translation statistics in CSV file"), - cl::value_desc("print-stats-csv"), - cl::cat(ToolTemplateCategory)); - -cl::opt OutputStatsFilename("o-stats", - cl::desc("Output filename for statistics"), - cl::value_desc("filename"), - cl::cat(ToolTemplateCategory)); - -cl::opt Examine("examine", - cl::desc("Combines -no-output and -print-stats options"), - cl::value_desc("examine"), - cl::cat(ToolTemplateCategory)); - -cl::opt DashDash(" ", - cl::desc("Separator between hipify-clang and clang options;\ndon't specify if there are no clang options"), - cl::ValueDisallowed, - cl::cat(ToolTemplateCategory)); - -cl::list IncludeDirs("I", - cl::desc("Add directory to include search path"), - cl::value_desc("directory"), - cl::ZeroOrMore, - cl::Prefix, - cl::cat(ToolTemplateCategory)); - -cl::list MacroNames("D", - cl::desc("Define to or 1 if omitted"), - cl::value_desc("macro>= SkipExcludedPPConditionalBlocks("skip-excluded-preprocessor-conditional-blocks", - cl::desc("Enable default preprocessor behaviour by skipping undefined conditional blocks"), - cl::value_desc("skip-excluded-preprocessor-conditional-blocks"), - cl::cat(ToolTemplateCategory)); - -cl::opt CudaGpuArch("cuda-gpu-arch", - cl::desc("CUDA GPU architecture (e.g. sm_35);\nmay be specified more than once"), - cl::value_desc("value"), - cl::ZeroOrMore, - cl::Prefix, - cl::cat(ToolTemplateCategory)); - - -cl::extrahelp CommonHelp(ct::CommonOptionsParser::HelpMessage); diff --git a/hipify-clang/src/ArgParse.h b/hipify-clang/src/ArgParse.h deleted file mode 100644 index 84053a036c..0000000000 --- a/hipify-clang/src/ArgParse.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "clang/Tooling/CommonOptionsParser.h" -#include "llvm/Support/CommandLine.h" - -namespace cl = llvm::cl; -namespace ct = clang::tooling; - -extern cl::OptionCategory ToolTemplateCategory; -extern cl::opt OutputFilename; -extern cl::opt OutputHipifyPerlDir; -extern cl::opt OutputPythonMapDir; -extern cl::opt OutputDir; -extern cl::opt TemporaryDir; -extern cl::opt CudaPath; -extern cl::list IncludeDirs; -extern cl::list MacroNames; -extern cl::opt Inplace; -extern cl::opt SaveTemps; -extern cl::opt GeneratePerl; -extern cl::opt GeneratePython; -extern cl::opt Verbose; -extern cl::opt NoBackup; -extern cl::opt NoOutput; -extern cl::opt PrintStats; -extern cl::opt PrintStatsCSV; -extern cl::opt OutputStatsFilename; -extern cl::opt Examine; -extern cl::extrahelp CommonHelp; -extern cl::opt TranslateToRoc; -extern cl::opt DashDash; -extern cl::opt SkipExcludedPPConditionalBlocks; -extern cl::opt CudaGpuArch; diff --git a/hipify-clang/src/CUDA2HIP.cpp b/hipify-clang/src/CUDA2HIP.cpp deleted file mode 100644 index 50f4682af2..0000000000 --- a/hipify-clang/src/CUDA2HIP.cpp +++ /dev/null @@ -1,110 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps CUDA header names to HIP header names -const std::map CUDA_INCLUDE_MAP{ - // CUDA includes - {"cuda.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER}}, - {"cuda_runtime.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME}}, - {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"channel_descriptor.h", {"hip/channel_descriptor.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"device_functions.h", {"hip/device_functions.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"driver_types.h", {"hip/driver_types.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"cuda_fp16.h", {"hip/hip_fp16.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME}}, - {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"cuda_profiler_api.h", {"hip/hip_profile.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME}}, - // cuComplex includes - {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX}}, - // cuBLAS includes - {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS}}, - {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS}}, - // cuRAND includes - {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND}}, - {"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_discrete.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_discrete2.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_globals.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_lognormal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mrg32k3a.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mtgp32.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mtgp32_host.h", {"hiprand_mtgp32_host.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mtgp32_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mtgp32dc_p_11213.h", {"rocrand_mtgp32_11213.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_normal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_normal_static.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_philox4x32_x.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_poisson.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_precalc.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_uniform.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - // cuDNN includes - {"cudnn.h", {"hipDNN.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DNN}}, - // cuFFT includes - {"cufft.h", {"hipfft.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_FFT}}, - // cuSPARSE includes - {"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE}}, - {"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE}}, - // CUB includes - {"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB}}, - // CAFFE2 includes - {"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/operator_fallback_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/spatial_batch_norm_op.h", {"caffe2/operators/hip/spatial_batch_norm_op_miopen.hip", "", CONV_INCLUDE, API_CAFFE2}}, - {"caffe2/operators/generate_proposals_op_util_nms_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/max_pool_with_index_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/rnn/recurrent_network_executor_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/utils/math/reduce.cuh", {"caffe2/utils/math/hip/reduce.cuh", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/gather_op.cuh", {"caffe2/operators/math/gather_op.cuh", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/core/common_cudnn.h", {"caffe2/core/hip/common_miopen.h", "", CONV_INCLUDE, API_CAFFE2}}, -}; - -const std::map& CUDA_RENAMES_MAP() { - static std::map ret; - if (!ret.empty()) { - return ret; - } - // First run, so compute the union map. - ret.insert(CUDA_DRIVER_TYPE_NAME_MAP.begin(), CUDA_DRIVER_TYPE_NAME_MAP.end()); - ret.insert(CUDA_DRIVER_FUNCTION_MAP.begin(), CUDA_DRIVER_FUNCTION_MAP.end()); - ret.insert(CUDA_RUNTIME_TYPE_NAME_MAP.begin(), CUDA_RUNTIME_TYPE_NAME_MAP.end()); - ret.insert(CUDA_RUNTIME_FUNCTION_MAP.begin(), CUDA_RUNTIME_FUNCTION_MAP.end()); - ret.insert(CUDA_COMPLEX_TYPE_NAME_MAP.begin(), CUDA_COMPLEX_TYPE_NAME_MAP.end()); - ret.insert(CUDA_COMPLEX_FUNCTION_MAP.begin(), CUDA_COMPLEX_FUNCTION_MAP.end()); - ret.insert(CUDA_BLAS_TYPE_NAME_MAP.begin(), CUDA_BLAS_TYPE_NAME_MAP.end()); - ret.insert(CUDA_BLAS_FUNCTION_MAP.begin(), CUDA_BLAS_FUNCTION_MAP.end()); - ret.insert(CUDA_RAND_TYPE_NAME_MAP.begin(), CUDA_RAND_TYPE_NAME_MAP.end()); - ret.insert(CUDA_RAND_FUNCTION_MAP.begin(), CUDA_RAND_FUNCTION_MAP.end()); - ret.insert(CUDA_DNN_TYPE_NAME_MAP.begin(), CUDA_DNN_TYPE_NAME_MAP.end()); - ret.insert(CUDA_DNN_FUNCTION_MAP.begin(), CUDA_DNN_FUNCTION_MAP.end()); - ret.insert(CUDA_FFT_TYPE_NAME_MAP.begin(), CUDA_FFT_TYPE_NAME_MAP.end()); - ret.insert(CUDA_FFT_FUNCTION_MAP.begin(), CUDA_FFT_FUNCTION_MAP.end()); - ret.insert(CUDA_SPARSE_TYPE_NAME_MAP.begin(), CUDA_SPARSE_TYPE_NAME_MAP.end()); - ret.insert(CUDA_SPARSE_FUNCTION_MAP.begin(), CUDA_SPARSE_FUNCTION_MAP.end()); - ret.insert(CUDA_CAFFE2_TYPE_NAME_MAP.begin(), CUDA_CAFFE2_TYPE_NAME_MAP.end()); - ret.insert(CUDA_CAFFE2_FUNCTION_MAP.begin(), CUDA_CAFFE2_FUNCTION_MAP.end()); - return ret; -}; diff --git a/hipify-clang/src/CUDA2HIP.h b/hipify-clang/src/CUDA2HIP.h deleted file mode 100644 index b02e7f1f3e..0000000000 --- a/hipify-clang/src/CUDA2HIP.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "llvm/ADT/StringRef.h" -#include -#include -#include "Statistics.h" - -// Maps CUDA header names to HIP header names -extern const std::map CUDA_INCLUDE_MAP; -// Maps the names of CUDA DRIVER API types to the corresponding HIP types -extern const std::map CUDA_DRIVER_TYPE_NAME_MAP; -// Maps the names of CUDA DRIVER API functions to the corresponding HIP functions -extern const std::map CUDA_DRIVER_FUNCTION_MAP; -// Maps the names of CUDA RUNTIME API types to the corresponding HIP types -extern const std::map CUDA_RUNTIME_TYPE_NAME_MAP; -// Maps the names of CUDA Complex API types to the corresponding HIP types -extern const std::map CUDA_COMPLEX_TYPE_NAME_MAP; -// Maps the names of CUDA Complex API functions to the corresponding HIP functions -extern const std::map CUDA_COMPLEX_FUNCTION_MAP; -// Maps the names of CUDA RUNTIME API functions to the corresponding HIP functions -extern const std::map CUDA_RUNTIME_FUNCTION_MAP; -// Maps the names of CUDA BLAS API types to the corresponding HIP types -extern const std::map CUDA_BLAS_TYPE_NAME_MAP; -// Maps the names of CUDA BLAS API functions to the corresponding HIP functions -extern const std::map CUDA_BLAS_FUNCTION_MAP; -// Maps the names of CUDA RAND API types to the corresponding HIP types -extern const std::map CUDA_RAND_TYPE_NAME_MAP; -// Maps the names of CUDA RAND API functions to the corresponding HIP functions -extern const std::map CUDA_RAND_FUNCTION_MAP; -// Maps the names of CUDA DNN API types to the corresponding HIP types -extern const std::map CUDA_DNN_TYPE_NAME_MAP; -// Maps the names of CUDA DNN API functions to the corresponding HIP functions -extern const std::map CUDA_DNN_FUNCTION_MAP; -// Maps the names of CUDA FFT API types to the corresponding HIP types -extern const std::map CUDA_FFT_TYPE_NAME_MAP; -// Maps the names of CUDA FFT API functions to the corresponding HIP functions -extern const std::map CUDA_FFT_FUNCTION_MAP; -// Maps the names of CUDA SPARSE API types to the corresponding HIP types -extern const std::map CUDA_SPARSE_TYPE_NAME_MAP; -// Maps the names of CUDA SPARSE API functions to the corresponding HIP functions -extern const std::map CUDA_SPARSE_FUNCTION_MAP; -// Maps the names of CUDA CAFFE2 API types to the corresponding HIP types -extern const std::map CUDA_CAFFE2_TYPE_NAME_MAP; -// Maps the names of CUDA CAFFE2 API functions to the corresponding HIP functions -extern const std::map CUDA_CAFFE2_FUNCTION_MAP; -// Maps the names of CUDA Device functions to the corresponding HIP functions -extern const std::map CUDA_DEVICE_FUNC_MAP; -// Maps the names of CUDA CUB API types to the corresponding HIP types -extern const std::map CUDA_CUB_TYPE_NAME_MAP; - -/** - * The union of all the above maps, except includes. - * - * This should be used rarely, but is still needed to convert macro definitions (which can - * contain any combination of the above things). AST walkers can usually get away with just - * looking in the lookup table for the type of element they are processing, however, saving - * a great deal of time. - */ -const std::map& CUDA_RENAMES_MAP(); diff --git a/hipify-clang/src/CUDA2HIP_BLAS_API_functions.cpp b/hipify-clang/src/CUDA2HIP_BLAS_API_functions.cpp deleted file mode 100644 index 8c9d3ea662..0000000000 --- a/hipify-clang/src/CUDA2HIP_BLAS_API_functions.cpp +++ /dev/null @@ -1,671 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_BLAS_FUNCTION_MAP{ - - // Blas management functions - {"cublasInit", {"hipblasInit", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasShutdown", {"hipblasShutdown", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetVersion", {"hipblasGetVersion", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetError", {"hipblasGetError", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasAlloc", {"hipblasAlloc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasFree", {"hipblasFree", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetKernelStream", {"hipblasSetKernelStream", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetMathMode", {"hipblasGetMathMode", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetMathMode", {"hipblasSetMathMode", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas logging - {"cublasLogCallback", {"hipblasLogCallback", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasLoggerConfigure", {"hipblasLoggerConfigure", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetLoggerCallback", {"hipblasSetLoggerCallback", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetLoggerCallback", {"hipblasGetLoggerCallback", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas1 (v1) Routines - {"cublasCreate", {"hipblasCreate", "rocblas_create_handle", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDestroy", {"hipblasDestroy", "rocblas_destroy_handle", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetStream", {"hipblasSetStream", "rocblas_set_stream", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetStream", {"hipblasGetStream", "rocblas_get_stream", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetPointerMode", {"hipblasSetPointerMode", "rocblas_set_pointer_mode", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetPointerMode", {"hipblasGetPointerMode", "rocblas_get_pointer_mode", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetVector", {"hipblasSetVector", "rocblas_set_vector", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetVector", {"hipblasGetVector", "rocblas_get_vector", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetVectorAsync", {"hipblasSetVectorAsync", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetVectorAsync", {"hipblasGetVectorAsync", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetMatrix", {"hipblasSetMatrix", "rocblas_set_matrix", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetMatrix", {"hipblasGetMatrix", "rocblas_get_matrix", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetMatrixAsync", {"hipblasSetMatrixAsync", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetMatrixAsync", {"hipblasGetMatrixAsync", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasXerbla", {"hipblasXerbla", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // NRM2 - {"cublasSnrm2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDnrm2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasScnrm2", {"hipblasScnrm2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDznrm2", {"hipblasDznrm2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasNrm2Ex", {"hipblasNrm2Ex", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // DOT - {"cublasSdot", {"hipblasSdot", "rocblas_sdot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDdot", {"hipblasDdot", "rocblas_ddot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCdotu", {"hipblasCdotu", "rocblas_cdotu", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCdotc", {"hipblasCdotc", "rocblas_cdotc", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdotu", {"hipblasZdotu", "rocblas_zdotu", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdotc", {"hipblasZdotc", "rocblas_zdotc", CONV_LIB_FUNC, API_BLAS}}, - - // SCAL - {"cublasSscal", {"hipblasSscal", "rocblas_sscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDscal", {"hipblasDscal", "rocblas_dscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCscal", {"hipblasCscal", "rocblas_cscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsscal", {"hipblasCsscal", "rocblas_csscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZscal", {"hipblasZscal", "rocblas_zscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdscal", {"hipblasZdscal", "rocblas_zdscal", CONV_LIB_FUNC, API_BLAS}}, - - // AXPY - {"cublasSaxpy", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDaxpy", {"hipblasDaxpy", "rocblas_daxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCaxpy", {"hipblasCaxpy", "rocblas_caxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZaxpy", {"hipblasZaxpy", "rocblas_zaxpy", CONV_LIB_FUNC, API_BLAS}}, - - // COPY - {"cublasScopy", {"hipblasScopy", "rocblas_scopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDcopy", {"hipblasDcopy", "rocblas_dcopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCcopy", {"hipblasCcopy", "rocblas_ccopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZcopy", {"hipblasZcopy", "rocblas_zcopy", CONV_LIB_FUNC, API_BLAS}}, - - // SWAP - {"cublasSswap", {"hipblasSswap", "rocblas_sswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDswap", {"hipblasDswap", "rocblas_dswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCswap", {"hipblasCswap", "rocblas_cswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZswap", {"hipblasZswap", "rocblas_zswap", CONV_LIB_FUNC, API_BLAS}}, - - // AMAX - {"cublasIsamax", {"hipblasIsamax", "rocblas_isamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIdamax", {"hipblasIdamax", "rocblas_idamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIcamax", {"hipblasIcamax", "rocblas_icamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIzamax", {"hipblasIzamax", "rocblas_izamax", CONV_LIB_FUNC, API_BLAS}}, - - // AMIN - {"cublasIsamin", {"hipblasIsamin", "rocblas_isamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIdamin", {"hipblasIdamin", "rocblas_idamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIcamin", {"hipblasIcamin", "rocblas_icamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIzamin", {"hipblasIzamin", "rocblas_izamin", CONV_LIB_FUNC, API_BLAS}}, - - // ASUM - {"cublasSasum", {"hipblasSasum", "rocblas_sasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDasum", {"hipblasDasum", "rocblas_dasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasScasum", {"hipblasScasum", "rocblas_scasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDzasum", {"hipblasDzasum", "rocblas_dzasum", CONV_LIB_FUNC, API_BLAS}}, - - // ROT - {"cublasSrot", {"hipblasSrot", "rocblas_srot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrot", {"hipblasDrot", "rocblas_drot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCrot", {"hipblasCrot", "rocblas_crot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsrot", {"hipblasCsrot", "rocblas_csrot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZrot", {"hipblasZrot", "rocblas_zrot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdrot", {"hipblasZdrot", "rocblas_zdrot", CONV_LIB_FUNC, API_BLAS}}, - - // ROTG - {"cublasSrotg", {"hipblasSrotg", "rocblas_srotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotg", {"hipblasDrotg", "rocblas_drotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCrotg", {"hipblasCrotg", "rocblas_crotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZrotg", {"hipblasZrotg", "rocblas_zrotg", CONV_LIB_FUNC, API_BLAS}}, - - // ROTM - {"cublasSrotm", {"hipblasSrotm", "rocblas_srotm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotm", {"hipblasDrotm", "rocblas_drotm", CONV_LIB_FUNC, API_BLAS}}, - - // ROTMG - {"cublasSrotmg", {"hipblasSrotmg", "rocblas_srotmg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotmg", {"hipblasDrotmg", "rocblas_drotmg", CONV_LIB_FUNC, API_BLAS}}, - - // GEMV - {"cublasSgemv", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS}}, - // NOTE: there is no such a function in CUDA - {"cublasSgemvBatched", {"hipblasSgemvBatched", "rocblas_sgemv_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemv", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemv", {"hipblasCgemv", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZgemv", {"hipblasZgemv", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS}}, - - // GBMV - {"cublasSgbmv", {"hipblasSgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgbmv", {"hipblasDgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgbmv", {"hipblasCgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgbmv", {"hipblasZgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRMV - {"cublasStrmv", {"hipblasStrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtrmv", {"hipblasDtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtrmv", {"hipblasCtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrmv", {"hipblasZtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TBMV - {"cublasStbmv", {"hipblasStbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtbmv", {"hipblasDtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtbmv", {"hipblasCtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtbmv", {"hipblasZtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPMV - {"cublasStpmv", {"hipblasStpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpmv", {"hipblasDtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpmv", {"hipblasCtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpmv", {"hipblasZtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSV - {"cublasStrsv", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDtrsv", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCtrsv", {"hipblasCtrsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsv", {"hipblasZtrsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPSV - {"cublasStpsv", {"hipblasStpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpsv", {"hipblasDtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpsv", {"hipblasCtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpsv", {"hipblasZtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TBSV - {"cublasStbsv", {"hipblasStbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtbsv", {"hipblasDtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtbsv", {"hipblasCtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtbsv", {"hipblasZtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYMV/HEMV - {"cublasSsymv", {"hipblasSsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsymv", {"hipblasDsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsymv", {"hipblasCsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsymv", {"hipblasZsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChemv", {"hipblasChemv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhemv", {"hipblasZhemv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SBMV/HBMV - {"cublasSsbmv", {"hipblasSsbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsbmv", {"hpiblasDsbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChbmv", {"hipblasChbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhbmv", {"hipblasZhbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPMV/HPMV - {"cublasSspmv", {"hipblasSspmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspmv", {"hipblasDspmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpmv", {"hipblasChpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpmv", {"hipblasZhpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // GER - {"cublasSger", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDger", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgeru", {"hipblasCgeru", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgerc", {"hipblasCgerc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgeru", {"hipblasZgeru", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgerc", {"hipblasZgerc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR/HER - {"cublasSsyr", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDsyr", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsyr", {"hipblasCsyr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr", {"hipblasZsyr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCher", {"hipblasCher", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher", {"hipblasZher", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPR/HPR - {"cublasSspr", {"hipblasSspr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspr", {"hipblasDspr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpr", {"hipblasChpr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpr", {"hipblasZhpr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR2/HER2 - {"cublasSsyr2", {"hipblasSsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyr2", {"hipblasDsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyr2", {"hipblasCsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr2", {"hipblasZsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCher2", {"hipblasCher2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher2", {"hipblasZher2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPR2/HPR2 - {"cublasSspr2", {"hipblasSspr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspr2", {"hipblasDspr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpr2", {"hipblasChpr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpr2", {"hipblasZhpr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas3 (v1) Routines - // GEMM - {"cublasSgemm", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemm", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemm", {"hipblasCgemm", "rocblas_cgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZgemm", {"hipblasZgemm", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasHgemm", {"hipblasHgemm", "rocblas_hgemm", CONV_LIB_FUNC, API_BLAS}}, - - // BATCH GEMM - {"cublasSgemmBatched", {"hipblasSgemmBatched", "rocblas_sgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemmBatched", {"hipblasDgemmBatched", "rocblas_dgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasHgemmBatched", {"hipblasHgemmBatched", "rocblas_hgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSgemmStridedBatched", {"hipblasSgemmStridedBatched", "rocblas_sgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemmStridedBatched", {"hipblasDgemmStridedBatched", "rocblas_dgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemmBatched", {"hipblasCgemmBatched", "rocblas_cgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemm3mBatched", {"hipblasCgemm3mBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgemmBatched", {"hipblasZgemmBatched", "rocblas_zgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemmStridedBatched", {"hipblasCgemmStridedBatched", "rocblas_cgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemm3mStridedBatched", {"hipblasCgemm3mStridedBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgemmStridedBatched", {"hipblasZgemmStridedBatched", "rocblas_zgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasHgemmStridedBatched", {"hipblasHgemmStridedBatched", "rocblas_hgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - - // SYRK - {"cublasSsyrk", {"hipblasSsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyrk", {"hipblasDsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyrk", {"hipblasCsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyrk", {"hipblasZsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HERK - {"cublasCherk", {"hipblasCherk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZherk", {"hipblasZherk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR2K - {"cublasSsyr2k", {"hipblasSsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyr2k", {"hipblasDsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyr2k", {"hipblasCsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr2k", {"hipblasZsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYRKX - eXtended SYRK - {"cublasSsyrkx", {"hipblasSsyrkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyrkx", {"hipblasDsyrkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyrkx", {"hipblasCsyrkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyrkx", {"hipblasZsyrkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HER2K - {"cublasCher2k", {"hipblasCher2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher2k", {"hipblasZher2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HERKX - eXtended HERK - {"cublasCherkx", {"hipblasCherkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZherkx", {"hipblasZherkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYMM - {"cublasSsymm", {"hipblasSsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsymm", {"hipblasDsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsymm", {"hipblasCsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsymm", {"hipblasZsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HEMM - {"cublasChemm", {"hipblasChemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhemm", {"hipblasZhemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSM - {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCtrsm", {"hipblasCtrsm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsm", {"hipblasZtrsm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRMM - {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - {"cublasCtrmm", {"hipblasCtrmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrmm", {"hipblasZtrmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // ------------------------ CUBLAS BLAS - like extension (cublas_api.h) - // GEAM - {"cublasSgeam", {"hipblasSgeam", "rocblas_sgeam", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgeam", {"hipblasDgeam", "rocblas_dgeam", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgeam", {"hipblasCgeam", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgeam", {"hipblasZgeam", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // GETRF - Batched LU - {"cublasSgetrfBatched", {"hipblasSgetrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgetrfBatched", {"hipblasDgetrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgetrfBatched", {"hipblasCgetrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgetrfBatched", {"hipblasZgetrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Batched inversion based on LU factorization from getrf - {"cublasSgetriBatched", {"hipblasSgetriBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgetriBatched", {"hipblasDgetriBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgetriBatched", {"hipblasCgetriBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgetriBatched", {"hipblasZgetriBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Batched solver based on LU factorization from getrf - {"cublasSgetrsBatched", {"hipblasSgetrsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgetrsBatched", {"hipblasDgetrsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgetrsBatched", {"hipblasCgetrsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgetrsBatched", {"hipblasZgetrsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSM - Batched Triangular Solver - {"cublasStrsmBatched", {"hipblasStrsmBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtrsmBatched", {"hipblasDtrsmBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtrsmBatched", {"hipblasCtrsmBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsmBatched", {"hipblasZtrsmBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // MATINV - Batched - {"cublasSmatinvBatched", {"hipblasSmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDmatinvBatched", {"hipblasDmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCmatinvBatched", {"hipblasCmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZmatinvBatched", {"hipblasZmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Batch QR Factorization - {"cublasSgeqrfBatched", {"hipblasSgeqrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgeqrfBatched", {"hipblasDgeqrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgeqrfBatched", {"hipblasCgeqrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgeqrfBatched", {"hipblasZgeqrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Least Square Min only m >= n and Non-transpose supported - {"cublasSgelsBatched", {"hipblasSgelsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgelsBatched", {"hipblasDgelsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgelsBatched", {"hipblasCgelsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgelsBatched", {"hipblasZgelsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // DGMM - {"cublasSdgmm", {"hipblasSdgmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDdgmm", {"hipblasDdgmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCdgmm", {"hipblasCdgmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZdgmm", {"hipblasZdgmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPTTR - Triangular Pack format to Triangular format - {"cublasStpttr", {"hipblasStpttr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpttr", {"hipblasDtpttr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpttr", {"hipblasCtpttr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpttr", {"hipblasZtpttr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRTTP - Triangular format to Triangular Pack format - {"cublasStrttp", {"hipblasStrttp", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtrttp", {"hipblasDtrttp", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtrttp", {"hipblasCtrttp", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrttp", {"hipblasZtrttp", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas2 (v2) Routines - {"cublasCreate_v2", {"hipblasCreate", "rocblas_create_handle", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDestroy_v2", {"hipblasDestroy", "rocblas_destroy_handle", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetVersion_v2", {"hipblasGetVersion", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetProperty", {"hipblasGetProperty", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetStream_v2", {"hipblasSetStream", "rocblas_set_stream", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetStream_v2", {"hipblasGetStream", "rocblas_get_stream", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetPointerMode_v2", {"hipblasGetPointerMode", "rocblas_set_pointer_mode", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetPointerMode_v2", {"hipblasSetPointerMode", "rocblas_get_pointer_mode", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetCudartVersion", {"hipblasGetCudartVersion", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // GEMV - {"cublasSgemv_v2", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemv_v2", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemv_v2", {"hipblasCgemv", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZgemv_v2", {"hipblasZgemv", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS}}, - - // GBMV - {"cublasSgbmv_v2", {"hipblasSgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgbmv_v2", {"hipblasDgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgbmv_v2", {"hipblasCgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgbmv_v2", {"hipblasZgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRMV - {"cublasStrmv_v2", {"hipblasStrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtrmv_v2", {"hipblasDtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtrmv_v2", {"hipblasCtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrmv_v2", {"hipblasZtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TBMV - {"cublasStbmv_v2", {"hipblasStbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtbmv_v2", {"hipblasDtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtbmv_v2", {"hipblasCtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtbmv_v2", {"hipblasZtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPMV - {"cublasStpmv_v2", {"hipblasStpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpmv_v2", {"hipblasDtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpmv_v2", {"hipblasCtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpmv_v2", {"hipblasZtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSV - {"cublasStrsv_v2", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDtrsv_v2", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCtrsv_v2", {"hipblasCtrsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsv_v2", {"hipblasZtrsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPSV - {"cublasStpsv_v2", {"hipblasStpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpsv_v2", {"hipblasDtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpsv_v2", {"hipblasCtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpsv_v2", {"hipblasZtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TBSV - {"cublasStbsv_v2", {"hipblasStbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtbsv_v2", {"hipblasDtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtbsv_v2", {"hipblasCtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtbsv_v2", {"hipblasZtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYMV/HEMV - {"cublasSsymv_v2", {"hipblasSsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsymv_v2", {"hipblasDsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsymv_v2", {"hipblasCsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsymv_v2", {"hipblasZsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChemv_v2", {"hipblasChemv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhemv_v2", {"hipblasZhemv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SBMV/HBMV - {"cublasSsbmv_v2", {"hipblasSsbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsbmv_v2", {"hpiblasDsbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChbmv_v2", {"hipblasChbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhbmv_v2", {"hipblasZhbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPMV/HPMV - {"cublasSspmv_v2", {"hipblasSspmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspmv_v2", {"hipblasDspmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpmv_v2", {"hipblasChpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpmv_v2", {"hipblasZhpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // GER - {"cublasSger_v2", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDger_v2", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgeru_v2", {"hipblasCgeru", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgerc_v2", {"hipblasCgerc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgeru_v2", {"hipblasZgeru", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgerc_v2", {"hipblasZgerc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR/HER - {"cublasSsyr_v2", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDsyr_v2", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsyr_v2", {"hipblasCsyr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr_v2", {"hipblasZsyr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCher_v2", {"hipblasCher", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher_v2", {"hipblasZher", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPR/HPR - {"cublasSspr_v2", {"hipblasSspr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspr_v2", {"hipblasDspr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpr_v2", {"hipblasChpr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpr_v2", {"hipblasZhpr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR2/HER2 - {"cublasSsyr2_v2", {"hipblasSsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyr2_v2", {"hipblasDsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyr2_v2", {"hipblasCsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr2_v2", {"hipblasZsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCher2_v2", {"hipblasCher2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher2_v2", {"hipblasZher2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPR2/HPR2 - {"cublasSspr2_v2", {"hipblasSspr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspr2_v2", {"hipblasDspr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpr2_v2", {"hipblasChpr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpr2_v2", {"hipblasZhpr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas3 (v2) Routines - // GEMM - {"cublasSgemm_v2", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemm_v2", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemm_v2", {"hipblasCgemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgemm3m", {"hipblasCgemm3m", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgemm3mEx", {"hipblasCgemm3mEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgemm_v2", {"hipblasZgemm", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZgemm3m", {"hipblasZgemm3m", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - //IO in FP16 / FP32, computation in float - {"cublasSgemmEx", {"hipblasSgemmEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGemmEx", {"hipblasGemmEx", "rocblas_gemm_ex", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGemmBatchedEx", {"hipblasGemmBatchedEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGemmStridedBatchedEx", {"hipblasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - // IO in Int8 complex/cuComplex, computation in cuComplex - {"cublasCgemmEx", {"hipblasCgemmEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasUint8gemmBias", {"hipblasUint8gemmBias", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYRK - {"cublasSsyrk_v2", {"hipblasSsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyrk_v2", {"hipblasDsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyrk_v2", {"hipblasCsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyrk_v2", {"hipblasZsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // IO in Int8 complex/cuComplex, computation in cuComplex - {"cublasCsyrkEx", {"hipblasCsyrkEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - // IO in Int8 complex/cuComplex, computation in cuComplex, Gaussian math - {"cublasCsyrk3mEx", {"hipblasCsyrk3mEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HERK - {"cublasCherk_v2", {"hipblasCherk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - // IO in Int8 complex/cuComplex, computation in cuComplex - {"cublasCherkEx", {"hipblasCherkEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - // IO in Int8 complex/cuComplex, computation in cuComplex, Gaussian math - {"cublasCherk3mEx", {"hipblasCherk3mEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZherk_v2", {"hipblasZherk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR2K - {"cublasSsyr2k_v2", {"hipblasSsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyr2k_v2", {"hipblasDsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyr2k_v2", {"hipblasCsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr2k_v2", {"hipblasZsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HER2K - {"cublasCher2k_v2", {"hipblasCher2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher2k_v2", {"hipblasZher2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYMM - {"cublasSsymm_v2", {"hipblasSsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsymm_v2", {"hipblasDsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsymm_v2", {"hipblasCsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsymm_v2", {"hipblasZsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HEMM - {"cublasChemm_v2", {"hipblasChemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhemm_v2", {"hipblasZhemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSM - {"cublasStrsm_v2", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDtrsm_v2", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCtrsm_v2", {"hipblasCtrsm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsm_v2", {"hipblasZtrsm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRMM - {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - {"cublasCtrmm_v2", {"hipblasCtrmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrmm_v2", {"hipblasZtrmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // NRM2 - {"cublasSnrm2_v2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDnrm2_v2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasScnrm2_v2", {"hipblasScnrm2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDznrm2_v2", {"hipblasDznrm2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS}}, - - // DOT - {"cublasDotEx", {"hipblasDotEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDotcEx", {"hipblasDotcEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - {"cublasSdot_v2", {"hipblasSdot", "rocblas_sdot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDdot_v2", {"hipblasDdot", "rocblas_ddot", CONV_LIB_FUNC, API_BLAS}}, - - {"cublasCdotu_v2", {"hipblasCdotu", "rocblas_cdotu", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCdotc_v2", {"hipblasCdotc", "rocblas_cdotc", CONV_LIB_FUNC, API_BLAS,}}, - {"cublasZdotu_v2", {"hipblasZdotu", "rocblas_zdotu", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdotc_v2", {"hipblasZdotc", "rocblas_zdotc", CONV_LIB_FUNC, API_BLAS}}, - - // SCAL - {"cublasScalEx", {"hipblasScalEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSscal_v2", {"hipblasSscal", "rocblas_sscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDscal_v2", {"hipblasDscal", "rocblas_dscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCscal_v2", {"hipblasCscal", "rocblas_cscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsscal_v2", {"hipblasCsscal", "rocblas_csscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZscal_v2", {"hipblasZscal", "rocblas_zscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdscal_v2", {"hipblasZdscal", "rocblas_zdscal", CONV_LIB_FUNC, API_BLAS}}, - - // AXPY - {"cublasAxpyEx", {"hipblasAxpyEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSaxpy_v2", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDaxpy_v2", {"hipblasDaxpy", "rocblas_daxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCaxpy_v2", {"hipblasCaxpy", "rocblas_caxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZaxpy_v2", {"hipblasZaxpy", "rocblas_zaxpy", CONV_LIB_FUNC, API_BLAS}}, - - // COPY - {"cublasCopyEx", {"hipblasCopyEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasScopy_v2", {"hipblasScopy", "rocblas_scopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDcopy_v2", {"hipblasDcopy", "rocblas_dcopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCcopy_v2", {"hipblasCcopy", "rocblas_ccopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZcopy_v2", {"hipblasZcopy", "rocblas_zcopy", CONV_LIB_FUNC, API_BLAS}}, - - // SWAP - {"cublasSwapEx", {"hipblasSwapEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSswap_v2", {"hipblasSswap", "rocblas_sswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDswap_v2", {"hipblasDswap", "rocblas_dswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCswap_v2", {"hipblasCswap", "rocblas_cswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZswap_v2", {"hipblasZswap", "rocblas_zswap", CONV_LIB_FUNC, API_BLAS}}, - - // AMAX - {"cublasIamaxEx", {"hipblasIamaxEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasIsamax_v2", {"hipblasIsamax", "rocblas_isamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIdamax_v2", {"hipblasIdamax", "rocblas_idamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIcamax_v2", {"hipblasIcamax", "rocblas_icamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIzamax_v2", {"hipblasIzamax", "rocblas_izamax", CONV_LIB_FUNC, API_BLAS}}, - - // AMIN - {"cublasIaminEx", {"hipblasIaminEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasIsamin_v2", {"hipblasIsamin", "rocblas_isamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIdamin_v2", {"hipblasIdamin", "rocblas_idamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIcamin_v2", {"hipblasIcamin", "rocblas_icamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIzamin_v2", {"hipblasIzamin", "rocblas_izamin", CONV_LIB_FUNC, API_BLAS}}, - - // ASUM - {"cublasAsumEx", {"hipblasAsumEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSasum_v2", {"hipblasSasum", "rocblas_sasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDasum_v2", {"hipblasDasum", "rocblas_dasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasScasum_v2", {"hipblasScasum", "rocblas_scasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDzasum_v2", {"hipblasDzasum", "rocblas_dzasum", CONV_LIB_FUNC, API_BLAS}}, - - // ROT - {"cublasRotEx", {"hipblasRotEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSrot_v2", {"hipblasSrot", "rocblas_srot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrot_v2", {"hipblasDrot", "rocblas_drot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCrot_v2", {"hipblasCrot", "rocblas_crot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsrot_v2", {"hipblasCsrot", "rocblas_csrot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZrot_v2", {"hipblasZrot", "rocblas_zrot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdrot_v2", {"hipblasZdrot", "rocblas_zdrot", CONV_LIB_FUNC, API_BLAS}}, - - // ROTG - {"cublasRotgEx", {"hipblasRotgEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSrotg_v2", {"hipblasSrotg", "rocblas_srotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotg_v2", {"hipblasDrotg", "rocblas_drotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCrotg_v2", {"hipblasCrotg", "rocblas_crotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZrotg_v2", {"hipblasZrotg", "rocblas_zrotg", CONV_LIB_FUNC, API_BLAS}}, - - // ROTM - {"cublasRotmEx", {"hipblasRotmEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSrotm_v2", {"hipblasSrotm", "rocblas_srotm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotm_v2", {"hipblasDrotm", "rocblas_drotm", CONV_LIB_FUNC, API_BLAS}}, - - // ROTMG - {"cublasRotmgEx", {"hipblasRotmgEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSrotmg_v2", {"hipblasSrotmg", "rocblas_srotmg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotmg_v2", {"hipblasDrotmg", "rocblas_drotmg", CONV_LIB_FUNC, API_BLAS}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_BLAS_API_types.cpp b/hipify-clang/src/CUDA2HIP_BLAS_API_types.cpp deleted file mode 100644 index a747eb0040..0000000000 --- a/hipify-clang/src/CUDA2HIP_BLAS_API_types.cpp +++ /dev/null @@ -1,158 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_BLAS_TYPE_NAME_MAP{ - // Blas defines - {"CUBLAS_VER_MAJOR", {"HIPBLAS_VER_MAJOR", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_VER_MINOR", {"HIPBLAS_VER_MINOR", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_VER_PATCH", {"HIPBLAS_VER_PATCH", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_VER_BUILD", {"HIPBLAS_VER_BUILD", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_VERSION", {"HIPBLAS_VERSION", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - - // Blas operations - {"cublasOperation_t", {"hipblasOperation_t", "rocblas_operation", CONV_TYPE, API_BLAS}}, - {"CUBLAS_OP_N", {"HIPBLAS_OP_N", "rocblas_operation_none", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_OP_T", {"HIPBLAS_OP_T", "rocblas_operation_transpose", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_OP_C", {"HIPBLAS_OP_C", "rocblas_operation_conjugate_transpose", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_OP_HERMITAN", {"HIPBLAS_OP_C", "rocblas_operation_conjugate_transpose", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_OP_CONJG", {"HIPBLAS_OP_CONJG", "rocblas_operation_conjugate", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, - - // Blas statuses - {"cublasStatus", {"hipblasStatus_t", "rocblas_status", CONV_TYPE, API_BLAS}}, - {"cublasStatus_t", {"hipblasStatus_t", "rocblas_status", CONV_TYPE, API_BLAS}}, - {"CUBLAS_STATUS_SUCCESS", {"HIPBLAS_STATUS_SUCCESS", "rocblas_status_success", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_NOT_INITIALIZED", {"HIPBLAS_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_ALLOC_FAILED", {"HIPBLAS_STATUS_ALLOC_FAILED", "rocblas_status_memory_error", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_INVALID_VALUE", {"HIPBLAS_STATUS_INVALID_VALUE", "rocblas_status_invalid_pointer", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_MAPPING_ERROR", {"HIPBLAS_STATUS_MAPPING_ERROR", "rocblas_status_internal_error", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_EXECUTION_FAILED", {"HIPBLAS_STATUS_EXECUTION_FAILED", "rocblas_status_internal_error", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_INTERNAL_ERROR", {"HIPBLAS_STATUS_INTERNAL_ERROR", "rocblas_status_internal_error", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_NOT_SUPPORTED", {"HIPBLAS_STATUS_NOT_SUPPORTED", "rocblas_status_not_implemented", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_ARCH_MISMATCH", {"HIPBLAS_STATUS_ARCH_MISMATCH", "rocblas_status_not_implemented", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_LICENSE_ERROR", {"HIPBLAS_STATUS_LICENSE_ERROR", "rocblas_status_not_implemented", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, - - // Blas Fill Modes - {"cublasFillMode_t", {"hipblasFillMode_t", "rocblas_fill", CONV_TYPE, API_BLAS}}, - {"CUBLAS_FILL_MODE_LOWER", {"HIPBLAS_FILL_MODE_LOWER", "rocblas_fill_lower", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_FILL_MODE_UPPER", {"HIPBLAS_FILL_MODE_UPPER", "rocblas_fill_upper", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_FILL_MODE_FULL", {"HIPBLAS_FILL_MODE_FULL", "rocblas_fill_full", CONV_NUMERIC_LITERAL, API_BLAS}}, - - // Blas Diag Types - {"cublasDiagType_t", {"hipblasDiagType_t", "rocblas_diagonal", CONV_TYPE, API_BLAS}}, - {"CUBLAS_DIAG_NON_UNIT", {"HIPBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_DIAG_UNIT", {"HIPBLAS_DIAG_UNIT", "rocblas_diagonal_unit", CONV_NUMERIC_LITERAL, API_BLAS}}, - - // Blas Side Modes - {"cublasSideMode_t", {"hipblasSideMode_t", "rocblas_side", CONV_TYPE, API_BLAS}}, - {"CUBLAS_SIDE_LEFT", {"HIPBLAS_SIDE_LEFT", "rocblas_side_left", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_SIDE_RIGHT", {"HIPBLAS_SIDE_RIGHT", "rocblas_side_right", CONV_NUMERIC_LITERAL, API_BLAS}}, - - // Blas Pointer Modes - {"cublasPointerMode_t", {"hipblasPointerMode_t", "rocblas_pointer_mode", CONV_TYPE, API_BLAS}}, - {"CUBLAS_POINTER_MODE_HOST", {"HIPBLAS_POINTER_MODE_HOST", "rocblas_pointer_mode_host", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_POINTER_MODE_DEVICE", {"HIPBLAS_POINTER_MODE_DEVICE", "rocblas_pointer_mode_device", CONV_NUMERIC_LITERAL, API_BLAS}}, - - // Blas Atomics Modes - {"cublasAtomicsMode_t", {"hipblasAtomicsMode_t", "rocblas_atomics_mode", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_ATOMICS_NOT_ALLOWED", {"HIPBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_ATOMICS_ALLOWED", {"HIPBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, - - // Blas Data Type - {"cublasDataType_t", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_BLAS}}, - - // Blas Math mode/tensor operation - {"cublasMath_t", {"hipblasMath_t", "", CONV_TYPE, API_BLAS, UNSUPPORTED}}, - {"CUBLAS_DEFAULT_MATH", {"HIPBLAS_DEFAULT_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, - {"CUBLAS_TENSOR_OP_MATH", {"HIPBLAS_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, - - // Blass different GEMM algorithms - {"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", "rocblas_gemm_algo", CONV_TYPE, API_BLAS}}, - {"CUBLAS_GEMM_DFALT", {"HIPBLAS_GEMM_DEFAULT", "rocblas_gemm_algo_standard", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160 // 0b0000000000 - {"CUBLAS_GEMM_DEFAULT", {"HIPBLAS_GEMM_DEFAULT", "rocblas_gemm_algo_standard", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160 // 0b0000000000 - {"CUBLAS_GEMM_ALGO0", {"HIPBLAS_GEMM_ALGO0", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 0 - {"CUBLAS_GEMM_ALGO1", {"HIPBLAS_GEMM_ALGO1", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 1 - {"CUBLAS_GEMM_ALGO2", {"HIPBLAS_GEMM_ALGO2", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 2 - {"CUBLAS_GEMM_ALGO3", {"HIPBLAS_GEMM_ALGO3", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 3 - {"CUBLAS_GEMM_ALGO4", {"HIPBLAS_GEMM_ALGO4", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 4 - {"CUBLAS_GEMM_ALGO5", {"HIPBLAS_GEMM_ALGO5", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 5 - {"CUBLAS_GEMM_ALGO6", {"HIPBLAS_GEMM_ALGO6", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 6 - {"CUBLAS_GEMM_ALGO7", {"HIPBLAS_GEMM_ALGO7", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 7 - {"CUBLAS_GEMM_ALGO8", {"HIPBLAS_GEMM_ALGO8", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 8 - {"CUBLAS_GEMM_ALGO9", {"HIPBLAS_GEMM_ALGO9", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 9 - {"CUBLAS_GEMM_ALGO10", {"HIPBLAS_GEMM_ALGO10", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 10 - {"CUBLAS_GEMM_ALGO11", {"HIPBLAS_GEMM_ALGO11", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 11 - {"CUBLAS_GEMM_ALGO12", {"HIPBLAS_GEMM_ALGO12", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 12 - {"CUBLAS_GEMM_ALGO13", {"HIPBLAS_GEMM_ALGO13", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 13 - {"CUBLAS_GEMM_ALGO14", {"HIPBLAS_GEMM_ALGO14", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 14 - {"CUBLAS_GEMM_ALGO15", {"HIPBLAS_GEMM_ALGO15", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 15 - {"CUBLAS_GEMM_ALGO16", {"HIPBLAS_GEMM_ALGO16", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 16 - {"CUBLAS_GEMM_ALGO17", {"HIPBLAS_GEMM_ALGO17", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 17 - {"CUBLAS_GEMM_ALGO18", {"HIPBLAS_GEMM_ALGO18", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 18 - {"CUBLAS_GEMM_ALGO19", {"HIPBLAS_GEMM_ALGO19", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 19 - {"CUBLAS_GEMM_ALGO20", {"HIPBLAS_GEMM_ALGO20", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 20 - {"CUBLAS_GEMM_ALGO21", {"HIPBLAS_GEMM_ALGO21", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 21 - {"CUBLAS_GEMM_ALGO22", {"HIPBLAS_GEMM_ALGO22", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 22 - {"CUBLAS_GEMM_ALGO23", {"HIPBLAS_GEMM_ALGO23", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 23 - {"CUBLAS_GEMM_DEFAULT_TENSOR_OP", {"HIPBLAS_GEMM_DEFAULT_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 99 - {"CUBLAS_GEMM_DFALT_TENSOR_OP", {"HIPBLAS_GEMM_DFALT_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 99 - {"CUBLAS_GEMM_ALGO0_TENSOR_OP", {"HIPBLAS_GEMM_ALGO0_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 100 - {"CUBLAS_GEMM_ALGO1_TENSOR_OP", {"HIPBLAS_GEMM_ALGO1_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 101 - {"CUBLAS_GEMM_ALGO2_TENSOR_OP", {"HIPBLAS_GEMM_ALGO2_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 102 - {"CUBLAS_GEMM_ALGO3_TENSOR_OP", {"HIPBLAS_GEMM_ALGO3_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 103 - {"CUBLAS_GEMM_ALGO4_TENSOR_OP", {"HIPBLAS_GEMM_ALGO4_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 104 - {"CUBLAS_GEMM_ALGO5_TENSOR_OP", {"HIPBLAS_GEMM_ALGO5_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 105 - {"CUBLAS_GEMM_ALGO6_TENSOR_OP", {"HIPBLAS_GEMM_ALGO6_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 106 - {"CUBLAS_GEMM_ALGO7_TENSOR_OP", {"HIPBLAS_GEMM_ALGO7_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 107 - {"CUBLAS_GEMM_ALGO8_TENSOR_OP", {"HIPBLAS_GEMM_ALGO8_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 108 - {"CUBLAS_GEMM_ALGO9_TENSOR_OP", {"HIPBLAS_GEMM_ALGO9_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 109 - {"CUBLAS_GEMM_ALGO10_TENSOR_OP", {"HIPBLAS_GEMM_ALGO10_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 110 - {"CUBLAS_GEMM_ALGO11_TENSOR_OP", {"HIPBLAS_GEMM_ALGO11_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 111 - {"CUBLAS_GEMM_ALGO12_TENSOR_OP", {"HIPBLAS_GEMM_ALGO12_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 112 - {"CUBLAS_GEMM_ALGO13_TENSOR_OP", {"HIPBLAS_GEMM_ALGO13_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 113 - {"CUBLAS_GEMM_ALGO14_TENSOR_OP", {"HIPBLAS_GEMM_ALGO14_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 114 - {"CUBLAS_GEMM_ALGO15_TENSOR_OP", {"HIPBLAS_GEMM_ALGO15_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 115 - - // TODO: rename hipblasDatatype_t to hipDataType_t and move from hipBLAS to HIP - {"cudaDataType_t", {"hipblasDatatype_t", "rocblas_datatype_", CONV_TYPE, API_RUNTIME}}, - {"cudaDataType", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_RUNTIME}}, - {"CUDA_R_16F", {"HIPBLAS_R_16F", "rocblas_datatype_f16_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 // 150 - {"CUDA_C_16F", {"HIPBLAS_C_16F", "rocblas_datatype_f16_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 6 // 153 - {"CUDA_R_32F", {"HIPBLAS_R_32F", "rocblas_datatype_f32_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 // 151 - {"CUDA_C_32F", {"HIPBLAS_C_32F", "rocblas_datatype_f32_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 // 154 - {"CUDA_R_64F", {"HIPBLAS_R_64F", "rocblas_datatype_f64_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 // 152 - {"CUDA_C_64F", {"HIPBLAS_C_64F", "rocblas_datatype_f64_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 5 // 155 - {"CUDA_R_8I", {"HIPBLAS_R_8I", "rocblas_datatype_i8_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 // 160 - {"CUDA_C_8I", {"HIPBLAS_C_8I", "rocblas_datatype_i8_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 7 // 164 - {"CUDA_R_8U", {"HIPBLAS_R_8U", "rocblas_datatype_u8_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 8 // 161 - {"CUDA_C_8U", {"HIPBLAS_C_8U", "rocblas_datatype_u8_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 9 // 165 - {"CUDA_R_32I", {"HIPBLAS_R_32I", "rocblas_datatype_i32_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 10 // 162 - {"CUDA_C_32I", {"HIPBLAS_C_32I", "rocblas_datatype_i32_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 11 // 166 - {"CUDA_R_32U", {"HIPBLAS_R_32U", "rocblas_datatype_u32_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 12 // 163 - {"CUDA_C_32U", {"HIPBLAS_C_32U", "rocblas_datatype_u32_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 13 // 167 - - {"cublasHandle_t", {"hipblasHandle_t", "rocblas_handle", CONV_TYPE, API_BLAS}}, - // TODO: dereferencing: typedef struct cublasContext *cublasHandle_t; - {"cublasContext", {"hipblasHandle_t", "_rocblas_handle", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_CAFFE2_API_functions.cpp b/hipify-clang/src/CUDA2HIP_CAFFE2_API_functions.cpp deleted file mode 100644 index 63860de262..0000000000 --- a/hipify-clang/src/CUDA2HIP_CAFFE2_API_functions.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA SPARSE API functions to the corresponding HIP functions -const std::map CUDA_CAFFE2_FUNCTION_MAP{ - {"cuda_stream", {"hip_stream", "", CONV_LIB_FUNC, API_CAFFE2}}, -}; \ No newline at end of file diff --git a/hipify-clang/src/CUDA2HIP_CAFFE2_API_types.cpp b/hipify-clang/src/CUDA2HIP_CAFFE2_API_types.cpp deleted file mode 100644 index 4791cffeee..0000000000 --- a/hipify-clang/src/CUDA2HIP_CAFFE2_API_types.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all types -const std::map CUDA_CAFFE2_TYPE_NAME_MAP{ - - // 5. Defines - {"REGISTER_CUDA_OPERATOR", {"REGISTER_HIP_OPERATOR", "", CONV_DEFINE, API_CAFFE2}}, - {"REGISTER_CUDA_OPERATOR_CREATOR", {"REGISTER_HIP_OPERATOR_CREATOR", "", CONV_DEFINE, API_CAFFE2}}, - - // 6. Classes - {"CUDAContext", {"HIPContext", "", CONV_TYPE, API_CAFFE2}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_CUB_API_types.cpp b/hipify-clang/src/CUDA2HIP_CUB_API_types.cpp deleted file mode 100644 index 0ef1912b54..0000000000 --- a/hipify-clang/src/CUDA2HIP_CUB_API_types.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA CUB API types to the corresponding HIP types -const std::map CUDA_CUB_TYPE_NAME_MAP{ - {"cub", {"hipcub", "", CONV_TYPE, API_CUB}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp b/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp deleted file mode 100644 index 6e0c1a54e7..0000000000 --- a/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA Complex API functions to the corresponding HIP functions -const std::map CUDA_COMPLEX_FUNCTION_MAP{ - {"cuCrealf", {"hipCrealf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCimagf", {"hipCimagf", "", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuFloatComplex", {"make_hipFloatComplex", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuConjf", {"hipConjf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCaddf", {"hipCaddf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCsubf", {"hipCsubf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCmulf", {"hipCmulf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCdivf", {"hipCdivf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCabsf", {"hipCabsf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCreal", {"hipCreal", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCimag", {"hipCimag", "", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuDoubleComplex", {"make_hipDoubleComplex", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuConj", {"hipConj", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCadd", {"hipCadd", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCsub", {"hipCsub", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCmul", {"hipCmul", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCdiv", {"hipCdiv", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCabs", {"hipCabs", "", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuComplex", {"make_hipComplex", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuComplexFloatToDouble", {"hipComplexFloatToDouble", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuComplexDoubleToFloat", {"hipComplexDoubleToFloat", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCfmaf", {"hipCfmaf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCfma", {"hipCfma", "", CONV_COMPLEX, API_COMPLEX}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp b/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp deleted file mode 100644 index 87016a21a0..0000000000 --- a/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA Complex API types to the corresponding HIP types -const std::map CUDA_COMPLEX_TYPE_NAME_MAP{ - {"cuFloatComplex", {"hipFloatComplex", "", CONV_TYPE, API_COMPLEX}}, - {"cuDoubleComplex", {"hipDoubleComplex", "", CONV_TYPE, API_COMPLEX}}, - {"cuComplex", {"hipComplex", "", CONV_TYPE, API_COMPLEX}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp b/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp deleted file mode 100644 index 765ce78a26..0000000000 --- a/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp +++ /dev/null @@ -1,299 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_DNN_FUNCTION_MAP{ - - {"cudnnGetVersion", {"hipdnnGetVersion", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnQueryRuntimeError", {"hipdnnQueryRuntimeError", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetProperty", {"hipdnnGetProperty", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetErrorString", {"hipdnnGetErrorString", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnIm2Col", {"hipdnnIm2Col", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreate", {"hipdnnCreate", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroy", {"hipdnnDestroy", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetStream", {"hipdnnSetStream", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetStream", {"hipdnnGetStream", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetCallback", {"hipdnnSetCallback", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetCallback", {"hipdnnGetCallback", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Tensor functions - {"cudnnCreateTensorDescriptor", {"hipdnnCreateTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensor4dDescriptor", {"hipdnnSetTensor4dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensor4dDescriptorEx", {"hipdnnSetTensor4dDescriptorEx", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetTensor4dDescriptor", {"hipdnnGetTensor4dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensorNdDescriptor", {"hipdnnSetTensorNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensorNdDescriptorEx", {"hipdnnSetTensorNdDescriptorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetTensorNdDescriptor", {"hipdnnGetTensorNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetTensorSizeInBytes", {"hipdnnGetTensorSizeInBytes", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyTensorDescriptor", {"hipdnnDestroyTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnTransformTensor", {"hipdnnTransformTensor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTransformTensorEx", {"hipdnnTransformTensorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnInitTransformDest", {"hipdnnInitTransformDest", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateTensorTransformDescriptor", {"hipdnnCreateTensorTransformDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetTensorTransformDescriptor", {"hipdnnSetTensorTransformDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetTensorTransformDescriptor", {"hipdnnGetTensorTransformDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyTensorTransformDescriptor", {"hipdnnDestroyTensorTransformDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAddTensor", {"hipdnnAddTensor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnCreateOpTensorDescriptor", {"hipdnnCreateOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetOpTensorDescriptor", {"hipdnnSetOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetOpTensorDescriptor", {"hipdnnGetOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyOpTensorDescriptor", {"hipdnnDestroyOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnOpTensor", {"hipdnnOpTensor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetFoldedConvBackwardDataDescriptors", {"hipdnnGetFoldedConvBackwardDataDescriptors", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Reduce Tensor functions - {"cudnnCreateReduceTensorDescriptor", {"hipdnnCreateReduceTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetReduceTensorDescriptor", {"hipdnnSetReduceTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetReduceTensorDescriptor", {"hipdnnGetReduceTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyReduceTensorDescriptor", {"hipdnnDestroyReduceTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetReductionIndicesSize", {"hipdnnGetReductionIndicesSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetReductionWorkspaceSize", {"hipdnnGetReductionWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnReduceTensor", {"hipdnnReduceTensor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensor", {"hipdnnSetTensor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnScaleTensor", {"hipdnnScaleTensor", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Filter functions - {"cudnnCreateFilterDescriptor", {"hipdnnCreateFilterDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetFilter4dDescriptor", {"hipdnnSetFilter4dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetFilter4dDescriptor", {"hipdnnGetFilter4dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetFilterNdDescriptor", {"hipdnnSetFilterNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetFilterNdDescriptor", {"hipdnnGetFilterNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetFilterSizeInBytes", {"hipdnnGetFilterSizeInBytes", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTransformFilter", {"hipdnnTransformFilter", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyFilterDescriptor", {"hipdnnDestroyFilterDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnReorderFilterAndBias", {"hipdnnReorderFilterAndBias", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Convolution functions - {"cudnnCreateConvolutionDescriptor", {"hipdnnCreateConvolutionDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetConvolutionMathType", {"hipdnnSetConvolutionMathType", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionMathType", {"hipdnnGetConvolutionMathType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetConvolutionGroupCount", {"hipdnnSetConvolutionGroupCount", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionGroupCount", {"hipdnnGetConvolutionGroupCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetConvolutionReorderType", {"hipdnnSetConvolutionReorderType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionReorderType", {"hipdnnGetConvolutionReorderType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetConvolution2dDescriptor", {"hipdnnSetConvolution2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolution2dDescriptor", {"hipdnnGetConvolution2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolution2dForwardOutputDim", {"hipdnnGetConvolution2dForwardOutputDim", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetConvolutionNdDescriptor", {"hipdnnSetConvolutionNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionNdDescriptor", {"hipdnnGetConvolutionNdDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionNdForwardOutputDim", {"hipdnnGetConvolutionNdForwardOutputDim", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyConvolutionDescriptor", {"hipdnnDestroyConvolutionDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionForwardAlgorithmMaxCount", {"hipdnnGetConvolutionForwardAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindConvolutionForwardAlgorithm", {"hipdnnFindConvolutionForwardAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnFindConvolutionForwardAlgorithmEx", {"hipdnnFindConvolutionForwardAlgorithmEx", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionForwardAlgorithm", {"hipdnnGetConvolutionForwardAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionForwardAlgorithm_v7", {"hipdnnGetConvolutionForwardAlgorithm_v7", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionForwardWorkspaceSize", {"hipdnnGetConvolutionForwardWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnConvolutionForward", {"hipdnnConvolutionForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnConvolutionBiasActivationForward", {"hipdnnConvolutionBiasActivationForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnConvolutionBackwardBias", {"hipdnnConvolutionBackwardBias", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", {"hipdnnGetConvolutionBackwardFilterAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindConvolutionBackwardFilterAlgorithm", {"hipdnnFindConvolutionBackwardFilterAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnFindConvolutionBackwardFilterAlgorithmEx", {"hipdnnFindConvolutionBackwardFilterAlgorithmEx", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardFilterAlgorithm", {"hipdnnGetConvolutionBackwardFilterAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardFilterAlgorithm_v7", {"hipdnnGetConvolutionBackwardFilterAlgorithm_v7", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionBackwardFilterWorkspaceSize", {"hipdnnGetConvolutionBackwardFilterWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnConvolutionBackwardFilter", {"hipdnnConvolutionBackwardFilter", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardDataAlgorithmMaxCount", {"hipdnnGetConvolutionBackwardDataAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindConvolutionBackwardDataAlgorithm", {"hipdnnFindConvolutionBackwardDataAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnFindConvolutionBackwardDataAlgorithmEx", {"hipdnnFindConvolutionBackwardDataAlgorithmEx", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardDataAlgorithm", {"hipdnnGetConvolutionBackwardDataAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardDataAlgorithm_v7", {"hipdnnGetConvolutionBackwardDataAlgorithm_v7", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionBackwardDataWorkspaceSize", {"hipdnnGetConvolutionBackwardDataWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnConvolutionBackwardData", {"hipdnnConvolutionBackwardData", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Sortmax functions - {"cudnnSoftmaxForward", {"hipdnnSoftmaxForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSoftmaxBackward", {"hipdnnSoftmaxBackward", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Pooling functions - {"cudnnCreatePoolingDescriptor", {"hipdnnCreatePoolingDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetPooling2dDescriptor", {"hipdnnSetPooling2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetPooling2dDescriptor", {"hipdnnGetPooling2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetPoolingNdDescriptor", {"hipdnnSetPoolingNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetPoolingNdDescriptor", {"hipdnnGetPoolingNdDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetPoolingNdForwardOutputDim", {"hipdnnGetPoolingNdForwardOutputDim", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetPooling2dForwardOutputDim", {"hipdnnGetPooling2dForwardOutputDim", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyPoolingDescriptor", {"hipdnnDestroyPoolingDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnPoolingForward", {"hipdnnPoolingForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnPoolingBackward", {"hipdnnPoolingBackward", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Activation functions - {"cudnnCreateActivationDescriptor", {"hipdnnCreateActivationDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetActivationDescriptor", {"hipdnnSetActivationDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetActivationDescriptor", {"hipdnnGetActivationDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyActivationDescriptor", {"hipdnnDestroyActivationDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnActivationForward", {"hipdnnActivationForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnActivationBackward", {"hipdnnActivationBackward", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN LRN functions - {"cudnnCreateLRNDescriptor", {"hipdnnCreateLRNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetLRNDescriptor", {"hipdnnSetLRNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetLRNDescriptor", {"hipdnnGetLRNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyLRNDescriptor", {"hipdnnDestroyLRNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnLRNCrossChannelForward", {"hipdnnLRNCrossChannelForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnLRNCrossChannelBackward", {"hipdnnLRNCrossChannelBackward", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Divisive Normalization functions - {"cudnnDivisiveNormalizationForward", {"hipdnnDivisiveNormalizationForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDivisiveNormalizationBackward", {"hipdnnDivisiveNormalizationBackward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Batch Normalization functions - {"cudnnDeriveBNTensorDescriptor", {"hipdnnDeriveBNTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnBatchNormalizationForwardTraining", {"hipdnnBatchNormalizationForwardTraining", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnBatchNormalizationForwardTrainingEx", {"hipdnnBatchNormalizationForwardTrainingEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnBatchNormalizationForwardInference", {"hipdnnBatchNormalizationForwardInference", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnBatchNormalizationBackward", {"hipdnnBatchNormalizationBackward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnBatchNormalizationBackwardEx", {"hipdnnBatchNormalizationBackwardEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize", {"hipdnnGetBatchNormalizationForwardTrainingExWorkspaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetBatchNormalizationBackwardExWorkspaceSize", {"hipdnnGetBatchNormalizationBackwardExWorkspaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetBatchNormalizationTrainingExReserveSpaceSize", {"hipdnnGetBatchNormalizationTrainingExReserveSpaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Spatial Transformer functions - {"cudnnCreateSpatialTransformerDescriptor", {"hipdnnCreateSpatialTransformerDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetSpatialTransformerNdDescriptor", {"hipdnnSetSpatialTransformerNdDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroySpatialTransformerDescriptor", {"hipdnnDestroySpatialTransformerDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTfGridGeneratorForward", {"hipdnnSpatialTfGridGeneratorForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTfGridGeneratorBackward", {"hipdnnSpatialTfGridGeneratorBackward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTfSamplerForward", {"hipdnnSpatialTfSamplerForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTfSamplerBackward", {"hipdnnSpatialTfSamplerBackward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Dropout functions - {"cudnnCreateDropoutDescriptor", {"hipdnnCreateDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyDropoutDescriptor", {"hipdnnDestroyDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDropoutGetStatesSize", {"hipdnnDropoutGetStatesSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDropoutGetReserveSpaceSize", {"hipdnnDropoutGetReserveSpaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetDropoutDescriptor", {"hipdnnSetDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetDropoutDescriptor", {"hipdnnGetDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRestoreDropoutDescriptor", {"hipdnnRestoreDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDropoutForward", {"hipdnnDropoutForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDropoutBackward", {"hipdnnDropoutBackward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN RNN functions - {"cudnnCreateRNNDescriptor", {"hipdnnCreateRNNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyRNNDescriptor", {"hipdnnDestroyRNNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNForwardInferenceAlgorithmMaxCount", {"hipdnnGetRNNForwardInferenceAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindRNNForwardInferenceAlgorithmEx", {"hipdnnFindRNNForwardInferenceAlgorithmEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNForwardTrainingAlgorithmMaxCount", {"hipdnnGetRNNForwardTrainingAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindRNNForwardTrainingAlgorithmEx", {"hipdnnFindRNNForwardTrainingAlgorithmEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNBackwardDataAlgorithmMaxCount", {"hipdnnGetRNNBackwardDataAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindRNNBackwardDataAlgorithmEx", {"hipdnnFindRNNBackwardDataAlgorithmEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNBackwardWeightsAlgorithmMaxCount", {"hipdnnGetRNNBackwardWeightsAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindRNNBackwardWeightsAlgorithmEx", {"hipdnnFindRNNBackwardWeightsAlgorithmEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreatePersistentRNNPlan", {"hipdnnCreatePersistentRNNPlan", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetPersistentRNNPlan", {"hipdnnSetPersistentRNNPlan", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyPersistentRNNPlan", {"hipdnnDestroyPersistentRNNPlan", "", CONV_LIB_FUNC, API_DNN}}, - // NOTE" hipdnnSetRNNDescriptor has additional argument hipdnnRNNBiasMode_t *biasMode without default value - {"cudnnSetRNNDescriptor", {"hipdnnSetRNNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - // NOTE" hipdnnGetRNNDescriptor has additional argument hipdnnRNNBiasMode_t *biasMode without default value - {"cudnnGetRNNDescriptor", {"hipdnnGetRNNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetRNNProjectionLayers", {"hipdnnSetRNNProjectionLayers", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNProjectionLayers", {"hipdnnGetRNNProjectionLayers", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNAlgorithmDescriptor", {"hipdnnSetRNNAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNMatrixMathType", {"hipdnnSetRNNMatrixMathType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNMatrixMathType", {"hipdnnGetRNNMatrixMathType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNWorkspaceSize", {"hipdnnGetRNNWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNTrainingReserveSize", {"hipdnnGetRNNTrainingReserveSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNParamsSize", {"hipdnnGetRNNParamsSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNLinLayerMatrixParams", {"hipdnnGetRNNLinLayerMatrixParams", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNLinLayerBiasParams", {"hipdnnGetRNNLinLayerBiasParams", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNForwardInference", {"hipdnnRNNForwardInference", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNForwardInferenceEx", {"hipdnnRNNForwardInferenceEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNForwardTraining", {"hipdnnRNNForwardTraining", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNForwardTrainingEx", {"hipdnnRNNForwardTrainingEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNBackwardData", {"hipdnnRNNBackwardData", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNBackwardDataEx", {"hipdnnRNNBackwardDataEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNBackwardWeights", {"hipdnnRNNBackwardWeights", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNBackwardWeightsEx", {"hipdnnRNNBackwardWeightsEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNDescriptor_v5", {"hipdnnSetRNNDescriptor_v5", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetRNNDescriptor_v6", {"hipdnnSetRNNDescriptor_v6", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetRNNPaddingMode", {"hipdnnSetRNNPaddingMode", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNPaddingMode", {"hipdnnGetRNNPaddingMode", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateRNNDataDescriptor", {"hipdnnCreateRNNDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyRNNDataDescriptor", {"hipdnnDestroyRNNDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNDataDescriptor", {"hipdnnSetRNNDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNDataDescriptor", {"hipdnnGetRNNDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNBiasMode", {"hipdnnSetRNNBiasMode", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNBiasMode", {"hipdnnGetRNNBiasMode", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Connectionist Temporal Classification loss functions - {"cudnnCreateCTCLossDescriptor", {"hipdnnCreateCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetCTCLossDescriptor", {"hipdnnSetCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetCTCLossDescriptorEx", {"hipdnnSetCTCLossDescriptorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetCTCLossDescriptor", {"hipdnnGetCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetCTCLossDescriptorEx", {"hipdnnGetCTCLossDescriptorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyCTCLossDescriptor", {"hipdnnDestroyCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCTCLoss", {"hipdnnCTCLoss", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetCTCLossWorkspaceSize", {"hipdnnGetCTCLossWorkspaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Algorithm functions - {"cudnnCreateAlgorithmDescriptor", {"hipdnnCreateAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetAlgorithmDescriptor", {"hipdnnSetAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetAlgorithmDescriptor", {"hipdnnGetAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCopyAlgorithmDescriptor", {"hipdnnCopyAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyAlgorithmDescriptor", {"hipdnnDestroyAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateAlgorithmPerformance", {"hipdnnCreateAlgorithmPerformance", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetAlgorithmPerformance", {"hipdnnSetAlgorithmPerformance", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetAlgorithmPerformance", {"hipdnnGetAlgorithmPerformance", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyAlgorithmPerformance", {"hipdnnDestroyAlgorithmPerformance", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetAlgorithmSpaceSize", {"hipdnnGetAlgorithmSpaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSaveAlgorithm", {"hipdnnSaveAlgorithm", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRestoreAlgorithm", {"hipdnnRestoreAlgorithm", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Clipping functions - {"cudnnRNNSetClip", {"hipdnnRNNSetClip", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNGetClip", {"hipdnnRNNGetClip", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Sequence functions - {"cudnnCreateSeqDataDescriptor", {"hipdnnCreateSeqDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroySeqDataDescriptor", {"hipdnnDestroySeqDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetSeqDataDescriptor", {"hipdnnSetSeqDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetSeqDataDescriptor", {"hipdnnGetSeqDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Multihead Attention functions - {"cudnnCreateAttnDescriptor", {"hipdnnCreateAttnDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyAttnDescriptor", {"hipdnnDestroyAttnDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetAttnDescriptor", {"hipdnnSetAttnDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetAttnDescriptor", {"hipdnnGetAttnDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetMultiHeadAttnBuffers", {"hipdnnGetMultiHeadAttnBuffers", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetMultiHeadAttnWeights", {"hipdnnGetMultiHeadAttnWeights", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnMultiHeadAttnForward", {"hipdnnMultiHeadAttnForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnMultiHeadAttnBackwardData", {"hipdnnMultiHeadAttnBackwardData", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnMultiHeadAttnBackwardWeights", {"hipdnnMultiHeadAttnBackwardWeights", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Fuse functions - {"cudnnCreateFusedOpsConstParamPack", {"hipdnnCreateFusedOpsConstParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyFusedOpsConstParamPack", {"hipdnnDestroyFusedOpsConstParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetFusedOpsConstParamPackAttribute", {"hipdnnSetFusedOpsConstParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetFusedOpsConstParamPackAttribute", {"hipdnnGetFusedOpsConstParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateFusedOpsVariantParamPack", {"hipdnnCreateFusedOpsVariantParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyFusedOpsVariantParamPack", {"hipdnnDestroyFusedOpsVariantParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetFusedOpsVariantParamPackAttribute", {"hipdnnSetFusedOpsVariantParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetFusedOpsVariantParamPackAttribute", {"hipdnnGetFusedOpsVariantParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateFusedOpsPlan", {"hipdnnCreateFusedOpsPlan", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyFusedOpsPlan", {"hipdnnDestroyFusedOpsPlan", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnMakeFusedOpsPlan", {"hipdnnMakeFusedOpsPlan", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsExecute", {"hipdnnFusedOpsExecute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp b/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp deleted file mode 100644 index cc372067b1..0000000000 --- a/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp +++ /dev/null @@ -1,391 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_DNN_TYPE_NAME_MAP{ - // cuDNN defines - {"CUDNN_VERSION", {"HIPDNN_VERSION", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7000 - {"CUDNN_DIM_MAX", {"HIPDNN_DIM_MAX", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"CUDNN_LRN_MIN_N", {"HIPDNN_LRN_MIN_N", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_LRN_MAX_N", {"HIPDNN_LRN_MAX_N", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 16 - {"CUDNN_LRN_MIN_K", {"HIPDNN_LRN_MIN_K", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1e-5 - {"CUDNN_LRN_MIN_BETA", {"HIPDNN_LRN_MIN_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0.01 - {"CUDNN_BN_MIN_EPSILON", {"HIPDNN_BN_MIN_EPSILON", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1e-5 - {"CUDNN_SEV_ERROR_EN", {"HIPDNN_SEV_ERROR_EN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEV_WARNING_EN", {"HIPDNN_SEV_WARNING_EN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEV_INFO_EN", {"HIPDNN_SEV_INFO_EN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEQDATA_DIM_COUNT", {"HIPDNN_SEQDATA_DIM_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - - // cuDNN enums - {"cudnnStatus_t", {"hipdnnStatus_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 - {"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 - {"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 9 - {"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 10 - {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 11 - {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12 - {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13 - {"cudnnRuntimeTag_t", {"hipdnnRuntimeTag_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnConvolutionMode_t", {"hipdnnConvolutionMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION", {"HIPDNN_CONVOLUTION", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CROSS_CORRELATION", {"HIPDNN_CROSS_CORRELATION", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnTensorFormat_t", {"hipdnnTensorFormat_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_TENSOR_NCHW", {"HIPDNN_TENSOR_NCHW", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_TENSOR_NHWC", {"HIPDNN_TENSOR_NHWC", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_TENSOR_NCHW_VECT_C", {"HIPDNN_TENSOR_NCHW_VECT_C", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnFoldingDirection_t", {"hipdnnFoldingDirection_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_TRANSFORM_FOLD", {"HIPDNN_TRANSFORM_FOLD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0U - {"CUDNN_TRANSFORM_UNFOLD", {"HIPDNN_TRANSFORM_UNFOLD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1U - {"cudnnDataType_t", {"hipdnnDataType_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_DATA_FLOAT", {"HIPDNN_DATA_FLOAT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_DATA_DOUBLE", {"HIPDNN_DATA_DOUBLE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_DATA_HALF", {"HIPDNN_DATA_HALF", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_DATA_INT8", {"HIPDNN_DATA_INT8", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_DATA_INT32", {"HIPDNN_DATA_INT32", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_DATA_INT8x4", {"HIPDNN_DATA_INT8x4", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_DATA_UINT8", {"HIPDNN_DATA_UINT8", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"CUDNN_DATA_UINT8x4", {"HIPDNN_DATA_UINT8x4", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 - {"CUDNN_DATA_INT8x32", {"HIPDNN_DATA_INT8x32", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"cudnnErrQueryMode_t", {"hipdnnErrQueryMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_ERRQUERY_RAWCODE", {"HIPDNN_ERRQUERY_RAWCODE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_ERRQUERY_NONBLOCKING", {"HIPDNN_ERRQUERY_NONBLOCKING", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_ERRQUERY_BLOCKING", {"HIPDNN_ERRQUERY_BLOCKING", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnSeverity_t", {"hipdnnSeverity_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEV_FATAL", {"HIPDNN_SEV_FATAL", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_SEV_ERROR", {"HIPDNN_SEV_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_SEV_WARNING", {"HIPDNN_SEV_WARNING", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_SEV_INFO", {"HIPDNN_SEV_INFO", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"cudnnConvolutionFwdAlgo_t", {"hipdnnConvolutionFwdAlgo_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_FWD_ALGO_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_GEMM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_CONVOLUTION_FWD_ALGO_DIRECT", {"HIPDNN_CONVOLUTION_FWD_ALGO_DIRECT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_CONVOLUTION_FWD_ALGO_FFT", {"HIPDNN_CONVOLUTION_FWD_ALGO_FFT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 - {"CUDNN_CONVOLUTION_FWD_ALGO_COUNT", {"HIPDNN_CONVOLUTION_FWD_ALGO_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 - {"cudnnConvolutionFwdPreference_t", {"hipdnnConvolutionFwdPreference_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_FWD_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_FWD_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnDeterminism_t", {"hipdnnDeterminism_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_NON_DETERMINISTIC", {"HIPDNN_NON_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_DETERMINISTIC", {"HIPDNN_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnDivNormMode_t", {"hipdnnDivNormMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_DIVNORM_PRECOMPUTED_MEANS", {"HIPDNN_DIVNORM_PRECOMPUTED_MEANS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"cudnnCTCLossAlgo_t", {"hipdnnCTCLossAlgo_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", {"HIPDNN_CTC_LOSS_ALGO_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", {"HIPDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnLRNMode_t", {"hipdnnLRNMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_LRN_CROSS_CHANNEL_DIM1", {"HIPDNN_LRN_CROSS_CHANNEL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 vs 1 - {"cudnnRNNInputMode_t", {"hipdnnRNNInputMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_LINEAR_INPUT", {"HIPDNN_LINEAR_INPUT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_SKIP_INPUT", {"HIPDNN_SKIP_INPUT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnDirectionMode_t", {"hipdnnDirectionMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_UNIDIRECTIONAL", {"HIPDNN_UNIDIRECTIONAL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_BIDIRECTIONAL", {"HIPDNN_BIDIRECTIONAL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnMathType_t", {"hipdnnMathType_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_DEFAULT_MATH", {"HIPDNN_DEFAULT_MATH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_TENSOR_OP_MATH", {"HIPDNN_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", {"HIPDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnNanPropagation_t", {"hipdnnNanPropagation_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_NOT_PROPAGATE_NAN", {"HIPDNN_NOT_PROPAGATE_NAN", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_PROPAGATE_NAN", {"HIPDNN_PROPAGATE_NAN", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnConvolutionBwdDataAlgo_t", {"hipdnnConvolutionBwdDataAlgo_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_0", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_0", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_1", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_1", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"cudnnConvolutionBwdFilterAlgo_t", {"hipdnnConvolutionBwdFilterAlgo_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_0", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_1", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_3", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 - {"cudnnConvolutionBwdFilterPreference_t", {"hipdnnConvolutionBwdFilterPreference_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT",{"HIPDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT","", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnRNNAlgo_t", {"hipdnnRNNAlgo_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_RNN_ALGO_STANDARD", {"HIPDNN_RNN_ALGO_STANDARD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_RNN_ALGO_PERSIST_STATIC", {"HIPDNN_RNN_ALGO_PERSIST_STATIC", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_RNN_ALGO_PERSIST_DYNAMIC", {"HIPDNN_RNN_ALGO_PERSIST_DYNAMIC", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_RNN_ALGO_COUNT", {"HIPDNN_RNN_ALGO_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"cudnnRNNMode_t", {"hipdnnRNNMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_RNN_RELU", {"HIPDNN_RNN_RELU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_RNN_TANH", {"HIPDNN_RNN_TANH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_LSTM", {"HIPDNN_LSTM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_GRU", {"HIPDNN_GRU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"cudnnRNNBiasMode_t", {"hipdnnRNNBiasMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_RNN_NO_BIAS", {"HIPDNN_RNN_NO_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_RNN_SINGLE_INP_BIAS", {"HIPDNN_RNN_WITH_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_RNN_DOUBLE_BIAS", {"HIPDNN_RNN_WITH_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_RNN_SINGLE_REC_BIAS", {"HIPDNN_RNN_WITH_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnOpTensorOp_t", {"hipdnnOpTensorOp_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_OP_TENSOR_ADD", {"HIPDNN_OP_TENSOR_ADD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_OP_TENSOR_MUL", {"HIPDNN_OP_TENSOR_MUL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_OP_TENSOR_MIN", {"HIPDNN_OP_TENSOR_MIN", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_OP_TENSOR_MAX", {"HIPDNN_OP_TENSOR_MAX", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_OP_TENSOR_SQRT", {"HIPDNN_OP_TENSOR_SQRT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_OP_TENSOR_NOT", {"HIPDNN_OP_TENSOR_NOT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"cudnnReduceTensorOp_t", {"hipdnnReduceTensorOp_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_REDUCE_TENSOR_ADD", {"HIPDNN_REDUCE_TENSOR_ADD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_REDUCE_TENSOR_MUL", {"HIPDNN_REDUCE_TENSOR_MUL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_REDUCE_TENSOR_MIN", {"HIPDNN_REDUCE_TENSOR_MIN", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_REDUCE_TENSOR_MAX", {"HIPDNN_REDUCE_TENSOR_MAX", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_REDUCE_TENSOR_AMAX", {"HIPDNN_REDUCE_TENSOR_AMAX", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_REDUCE_TENSOR_AVG", {"HIPDNN_REDUCE_TENSOR_AVG", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_REDUCE_TENSOR_NORM1", {"HIPDNN_REDUCE_TENSOR_NORM1", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"CUDNN_REDUCE_TENSOR_NORM2", {"HIPDNN_REDUCE_TENSOR_NORM2", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 - {"CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS", {"HIPDNN_REDUCE_TENSOR_MUL_NO_ZEROS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 - {"cudnnReduceTensorIndices_t", {"hipdnnReduceTensorIndices_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_REDUCE_TENSOR_NO_INDICES", {"HIPDNN_REDUCE_TENSOR_NO_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_REDUCE_TENSOR_FLATTENED_INDICES", {"HIPDNN_REDUCE_TENSOR_FLATTENED_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnConvolutionBwdDataPreference_t", {"hipdnnConvolutionBwdDataPreference_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnIndicesType_t", {"hipdnnIndicesType_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_32BIT_INDICES", {"HIPDNN_32BIT_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_64BIT_INDICES", {"HIPDNN_64BIT_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_16BIT_INDICES", {"HIPDNN_16BIT_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_8BIT_INDICES", {"HIPDNN_8BIT_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"cudnnSoftmaxAlgorithm_t", {"hipdnnSoftmaxAlgorithm_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_SOFTMAX_FAST", {"HIPDNN_SOFTMAX_FAST", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_SOFTMAX_ACCURATE", {"HIPDNN_SOFTMAX_ACCURATE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_SOFTMAX_LOG", {"HIPDNN_SOFTMAX_LOG", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnSoftmaxMode_t", {"hipdnnSoftmaxMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_SOFTMAX_MODE_INSTANCE", {"HIPDNN_SOFTMAX_MODE_INSTANCE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_SOFTMAX_MODE_CHANNEL", {"HIPDNN_SOFTMAX_MODE_CHANNEL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnPoolingMode_t", {"hipdnnPoolingMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_POOLING_MAX", {"HIPDNN_POOLING_MAX", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_POOLING_MAX_DETERMINISTIC", {"HIPDNN_POOLING_MAX_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"cudnnActivationMode_t", {"hipdnnActivationMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_ACTIVATION_SIGMOID", {"HIPDNN_ACTIVATION_SIGMOID", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_ACTIVATION_RELU", {"HIPDNN_ACTIVATION_RELU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_ACTIVATION_TANH", {"HIPDNN_ACTIVATION_TANH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_ACTIVATION_CLIPPED_RELU", {"HIPDNN_ACTIVATION_CLIPPED_RELU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_ACTIVATION_ELU", {"HIPDNN_ACTIVATION_ELU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_ACTIVATION_IDENTITY", {"HIPDNN_ACTIVATION_PATHTRU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"cudnnBatchNormMode_t", {"hipdnnBatchNormMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_BATCHNORM_PER_ACTIVATION", {"HIPDNN_BATCHNORM_PER_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_BATCHNORM_SPATIAL", {"HIPDNN_BATCHNORM_SPATIAL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_BATCHNORM_SPATIAL_PERSISTENT", {"HIPDNN_BATCHNORM_SPATIAL_PERSISTENT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnSamplerType_t", {"hipdnnSamplerType_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SAMPLER_BILINEAR", {"HIPDNN_SAMPLER_BILINEAR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"cudnnBatchNormOps_t", {"hipdnnBatchNormOps_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_BATCHNORM_OPS_BN", {"HIPDNN_BATCHNORM_OPS_BN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_BATCHNORM_OPS_BN_ACTIVATION", {"HIPDNN_BATCHNORM_OPS_BN_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", {"HIPDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnRNNClipMode_t", {"hipdnnRNNClipMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_RNN_CLIP_NONE", {"HIPDNN_RNN_CLIP_NONE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_RNN_CLIP_MINMAX", {"HIPDNN_RNN_CLIP_MINMAX", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnRNNDataLayout_t", {"hipdnnRNNDataLayout_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", {"HIPDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", {"HIPDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", {"HIPDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnRNNPaddingMode_t", {"hipdnnRNNPaddingMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_RNN_PADDED_IO_DISABLED", {"HIPDNN_RNN_PADDED_IO_DISABLED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_RNN_PADDED_IO_ENABLED", {"HIPDNN_RNN_PADDED_IO_ENABLED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnSeqDataAxis_t", {"hipdnnSeqDataAxis_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEQDATA_TIME_DIM", {"HIPDNN_SEQDATA_TIME_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_SEQDATA_BATCH_DIM", {"HIPDNN_SEQDATA_BATCH_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_SEQDATA_BEAM_DIM", {"HIPDNN_SEQDATA_BEAM_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_SEQDATA_VECT_DIM", {"HIPDNN_SEQDATA_VECT_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"cudnnAttnQueryMap_t", {"hipdnnAttnQueryMap_t", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_ATTN_QUERYMAP_ALL_TO_ONE", {"HIPDNN_ATTN_QUERYMAP_ALL_TO_ONE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_ATTN_QUERYMAP_ONE_TO_ONE", {"HIPDNN_ATTN_QUERYMAP_ONE_TO_ONE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1U << 0 - {"CUDNN_ATTN_DISABLE_PROJ_BIASES", {"HIPDNN_ATTN_DISABLE_PROJ_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_ATTN_ENABLE_PROJ_BIASES", {"HIPDNN_ATTN_ENABLE_PROJ_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1U << 1 - {"cudnnMultiHeadAttnWeightKind_t", {"hipdnnMultiHeadAttnWeightKind_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_MH_ATTN_Q_WEIGHTS", {"HIPDNN_MH_ATTN_Q_WEIGHTS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_MH_ATTN_K_WEIGHTS", {"HIPDNN_MH_ATTN_K_WEIGHTS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_MH_ATTN_V_WEIGHTS", {"HIPDNN_MH_ATTN_V_WEIGHTS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_MH_ATTN_O_WEIGHTS", {"HIPDNN_MH_ATTN_O_WEIGHTS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"CUDNN_MH_ATTN_Q_BIASES", {"HIPDNN_MH_ATTN_Q_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - {"CUDNN_MH_ATTN_K_BIASES", {"HIPDNN_MH_ATTN_K_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"CUDNN_MH_ATTN_V_BIASES", {"HIPDNN_MH_ATTN_V_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"CUDNN_MH_ATTN_O_BIASES", {"HIPDNN_MH_ATTN_O_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 - {"CUDNN_ATTN_WKIND_COUNT", {"HIPDNN_ATTN_WKIND_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"cudnnWgradMode_t", {"hipdnnWgradMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_WGRAD_MODE_ADD", {"HIPDNN_WGRAD_MODE_ADD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_WGRAD_MODE_SET", {"HIPDNN_WGRAD_MODE_SET", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnReorderType_t", {"hipdnnReorderType_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_DEFAULT_REORDER", {"HIPDNN_DEFAULT_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_NO_REORDER", {"HIPDNN_NO_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnLossNormalizationMode_t", {"hipdnnLossNormalizationMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_LOSS_NORMALIZATION_NONE", {"HIPDNN_LOSS_NORMALIZATION_NONE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_LOSS_NORMALIZATION_SOFTMAX", {"HIPDNN_LOSS_NORMALIZATION_SOFTMAX", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnFusedOps_t", {"hipdnnFusedOps_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", {"HIPDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", {"HIPDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", {"HIPDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", {"HIPDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", {"HIPDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - {"CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", {"HIPDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", {"HIPDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"cudnnFusedOpsConstParamLabel_t", {"hipdnnFusedOpsConstParamLabel_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_PARAM_XDESC", {"HIPDNN_PARAM_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_PARAM_XDATA_PLACEHOLDER", {"HIPDNN_PARAM_XDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_PARAM_BN_MODE", {"HIPDNN_PARAM_BN_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_PARAM_BN_EQSCALEBIAS_DESC", {"HIPDNN_PARAM_BN_EQSCALEBIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_EQSCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - {"CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_EQBIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"CUDNN_PARAM_ACTIVATION_DESC", {"HIPDNN_PARAM_ACTIVATION_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"CUDNN_PARAM_CONV_DESC", {"HIPDNN_PARAM_CONV_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 - {"CUDNN_PARAM_WDESC", {"HIPDNN_PARAM_WDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"CUDNN_PARAM_WDATA_PLACEHOLDER", {"HIPDNN_PARAM_WDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 9 - {"CUDNN_PARAM_DWDESC", {"HIPDNN_PARAM_DWDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 10 - {"CUDNN_PARAM_DWDATA_PLACEHOLDER", {"HIPDNN_PARAM_DWDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 11 - {"CUDNN_PARAM_YDESC", {"HIPDNN_PARAM_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12 - {"CUDNN_PARAM_YDATA_PLACEHOLDER", {"HIPDNN_PARAM_YDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13 - {"CUDNN_PARAM_DYDESC", {"HIPDNN_PARAM_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 14 - {"CUDNN_PARAM_DYDATA_PLACEHOLDER", {"HIPDNN_PARAM_DYDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 15 - {"CUDNN_PARAM_YSTATS_DESC", {"HIPDNN_PARAM_YSTATS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 16 - {"CUDNN_PARAM_YSUM_PLACEHOLDER", {"HIPDNN_PARAM_YSUM_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 17 - {"CUDNN_PARAM_YSQSUM_PLACEHOLDER", {"HIPDNN_PARAM_YSQSUM_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 18 - {"CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", {"HIPDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 19 - {"CUDNN_PARAM_BN_SCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_SCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 20 - {"CUDNN_PARAM_BN_BIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_BIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 21 - {"CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", {"HIPDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 22 - {"CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", {"HIPDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 23 - {"CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", {"HIPDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 24 - {"CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", {"HIPDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 25 - {"CUDNN_PARAM_ZDESC", {"HIPDNN_PARAM_ZDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 26 - {"CUDNN_PARAM_ZDATA_PLACEHOLDER", {"HIPDNN_PARAM_ZDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 27 - {"CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", {"HIPDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 28 - {"CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 29 - {"CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 30 - {"CUDNN_PARAM_ACTIVATION_BITMASK_DESC", {"HIPDNN_PARAM_ACTIVATION_BITMASK_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 31 - {"CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", {"HIPDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 32 - {"CUDNN_PARAM_DXDESC", {"HIPDNN_PARAM_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 33 - {"CUDNN_PARAM_DXDATA_PLACEHOLDER", {"HIPDNN_PARAM_DXDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 34 - {"CUDNN_PARAM_DZDESC", {"HIPDNN_PARAM_DZDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 35 - {"CUDNN_PARAM_DZDATA_PLACEHOLDER", {"HIPDNN_PARAM_DZDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 36 - {"CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_DSCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 37 - {"CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_DBIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 38 - {"cudnnFusedOpsPointerPlaceHolder_t", {"hipdnnActivationMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_PTR_NULL", {"HIPDNN_ACTIVATION_SIGMOID", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_PTR_ELEM_ALIGNED", {"HIPDNN_ACTIVATION_RELU", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_PTR_16B_ALIGNED", {"HIPDNN_ACTIVATION_TANH", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnFusedOpsVariantParamLabel_t", {"hipdnnFusedOpsVariantParamLabel_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_PTR_XDATA", {"HIPDNN_PTR_XDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_PTR_BN_EQSCALE", {"HIPDNN_PTR_BN_EQSCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_PTR_BN_EQBIAS", {"HIPDNN_PTR_BN_EQBIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_PTR_WDATA", {"HIPDNN_PTR_WDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"CUDNN_PTR_DWDATA", {"HIPDNN_PTR_DWDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - {"CUDNN_PTR_YDATA", {"HIPDNN_PTR_YDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"CUDNN_PTR_DYDATA", {"HIPDNN_PTR_DYDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"CUDNN_PTR_YSUM", {"HIPDNN_PTR_YSUM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 - {"CUDNN_PTR_YSQSUM", {"HIPDNN_PTR_YSQSUM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"CUDNN_PTR_WORKSPACE", {"HIPDNN_PTR_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 9 - {"CUDNN_PTR_BN_SCALE", {"HIPDNN_PTR_BN_SCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 10 - {"CUDNN_PTR_BN_BIAS", {"HIPDNN_PTR_BN_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 11 - {"CUDNN_PTR_BN_SAVED_MEAN", {"HIPDNN_PTR_BN_SAVED_MEAN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12 - {"CUDNN_PTR_BN_SAVED_INVSTD", {"HIPDNN_PTR_BN_SAVED_INVSTD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13 - {"CUDNN_PTR_BN_RUNNING_MEAN", {"HIPDNN_PTR_BN_RUNNING_MEAN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 14 - {"CUDNN_PTR_BN_RUNNING_VAR", {"HIPDNN_PTR_BN_RUNNING_VAR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 15 - {"CUDNN_PTR_ZDATA", {"HIPDNN_PTR_ZDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 16 - {"CUDNN_PTR_BN_Z_EQSCALE", {"HIPDNN_PTR_BN_Z_EQSCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 17 - {"CUDNN_PTR_BN_Z_EQBIAS", {"HIPDNN_PTR_BN_Z_EQBIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 18 - {"CUDNN_PTR_ACTIVATION_BITMASK", {"HIPDNN_PTR_ACTIVATION_BITMASK", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 19 - {"CUDNN_PTR_DXDATA", {"HIPDNN_PTR_DXDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 20 - {"CUDNN_PTR_DZDATA", {"HIPDNN_PTR_DZDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 21 - {"CUDNN_PTR_BN_DSCALE", {"HIPDNN_PTR_BN_DSCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 22 - {"CUDNN_PTR_BN_DBIAS", {"HIPDNN_PTR_BN_DBIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 23 - {"CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", {"HIPDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 100 - {"CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", {"HIPDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 101 - {"CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", {"HIPDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 102 - {"CUDNN_SCALAR_DOUBLE_BN_EPSILON", {"HIPDNN_SCALAR_DOUBLE_BN_EPSILON", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 103 - - // cuDNN types - {"cudnnContext", {"hipdnnContext", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnHandle_t", {"hipdnnHandle_t", "", CONV_TYPE, API_DNN}}, - {"cudnnTensorStruct", {"hipdnnTensorStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTensorDescriptor_t", {"hipdnnTensorDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnConvolutionStruct", {"hipdnnConvolutionStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnConvolutionDescriptor_t", {"hipdnnConvolutionDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnPoolingStruct", {"hipdnnPoolingStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnPoolingDescriptor_t", {"hipdnnPoolingDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnFilterStruct", {"hipdnnFilterStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFilterDescriptor_t", {"hipdnnFilterDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnLRNStruct", {"hipdnnLRNStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnLRNDescriptor_t", {"hipdnnLRNDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnActivationStruct", {"hipdnnActivationStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnActivationDescriptor_t", {"hipdnnActivationDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnSpatialTransformerStruct", {"hipdnnSpatialTransformerStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTransformerDescriptor_t", {"hipdnnSpatialTransformerDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnOpTensorStruct", {"hipdnnOpTensorStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnOpTensorDescriptor_t", {"hipdnnOpTensorDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnReduceTensorStruct", {"hipdnnReduceTensorStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnReduceTensorDescriptor_t", {"hipdnnReduceTensorDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnCTCLossStruct", {"hipdnnCTCLossStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCTCLossDescriptor_t", {"hipdnnCTCLossDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTensorTransformStruct", {"hipdnnTensorTransformStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTensorTransformDescriptor_t", {"hipdnnTensorTransformDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnConvolutionFwdAlgoPerf_t", {"hipdnnConvolutionFwdAlgoPerf_t", "", CONV_TYPE, API_DNN}}, - {"cudnnConvolutionBwdFilterAlgoPerf_t", {"hipdnnConvolutionBwdFilterAlgoPerf_t", "", CONV_TYPE, API_DNN}}, - {"cudnnConvolutionBwdDataAlgoPerf_t", {"hipdnnConvolutionBwdDataAlgoPerf_t", "", CONV_TYPE, API_DNN}}, - {"cudnnDropoutStruct", {"hipdnnDropoutStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDropoutDescriptor_t", {"hipdnnDropoutDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnAlgorithmStruct", {"hipdnnAlgorithmStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAlgorithmDescriptor_t", {"hipdnnAlgorithmDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAlgorithmPerformanceStruct", {"hipdnnAlgorithmPerformanceStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAlgorithmPerformance_t", {"hipdnnAlgorithmPerformance_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNStruct", {"hipdnnRNNStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNDescriptor_t", {"hipdnnRNNDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnPersistentRNNPlan", {"hipdnnPersistentRNNPlan", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnPersistentRNNPlan_t", {"hipdnnPersistentRNNPlan_t", "", CONV_TYPE, API_DNN}}, - {"cudnnAlgorithm_t", {"hipdnnAlgorithm_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDebug_t", {"hipdnnDebug_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCallback_t", {"hipdnnCallback_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNDataStruct", {"hipdnnRNNDataStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNDataDescriptor_t", {"hipdnnRNNDataDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSeqDataStruct", {"hipdnnSeqDataStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSeqDataDescriptor_t", {"hipdnnSeqDataDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAttnStruct", {"hipdnnAttnStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAttnDescriptor_t", {"hipdnnAttnDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsConstParamStruct", {"hipdnnFusedOpsConstParamStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsConstParamPack_t", {"hipdnnFusedOpsConstParamPack_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsVariantParamStruct", {"hipdnnFusedOpsVariantParamStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsVariantParamPack_t", {"hipdnnFusedOpsVariantParamPack_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsPlanStruct", {"hipdnnFusedOpsPlanStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsPlan_t", {"hipdnnFusedOpsPlan_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Device_functions.cpp b/hipify-clang/src/CUDA2HIP_Device_functions.cpp deleted file mode 100644 index 87fe2ac86c..0000000000 --- a/hipify-clang/src/CUDA2HIP_Device_functions.cpp +++ /dev/null @@ -1,616 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps CUDA header names to HIP header names -const std::map CUDA_DEVICE_FUNC_MAP{ - // math functions - {"abs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"labs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llabs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fabs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fabsf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"min", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fminf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"max", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmaxf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sincos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sincosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rsqrtf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp2f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp10f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"expm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"expm1f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log2f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log1p", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log1pf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"floor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cosh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tanh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"acosh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"acoshf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"asinh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"asinhf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atanh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atanhf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ldexp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ldexpf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"logb", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"logbf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ilogb", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ilogbf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"scalbn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"scalbnf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"scalbln", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"scalblnf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"frexp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"frexpf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"round", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"roundf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lround", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lroundf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llround", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llroundf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rintf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lrint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lrintf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llrint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llrintf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nearbyint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nearbyintf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ceil", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"trunc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"truncf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fdim", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fdimf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atan2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"acos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"asin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hypot", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rhypot", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hypotf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rhypotf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm3d", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm3d", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm4d", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm4d", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnormf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm3df", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm3df", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm4df", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm4df", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cbrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cbrtf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rcbrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rcbrtf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinpi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinpif", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cospi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cospif", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sincospi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sincospif", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"pow", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"modf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmod", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"remainder", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"remainderf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"remquo", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"remquof", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"j0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"j0f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"j1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"j1f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"jn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"jnf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"y0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"y0f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"y1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"y1f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"yn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ynf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cyl_bessel_i0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cyl_bessel_i0f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cyl_bessel_i1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cyl_bessel_i1f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfinv", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfinvf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lgamma", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcinv", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcinvf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normcdfinv", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normcdfinvf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normcdf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normcdff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcx", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcxf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lgammaf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tgamma", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tgammaf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"copysign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"copysignf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nextafter", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nextafterf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fma", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmaf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"acosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"asinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atan2f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"coshf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinhf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tanhf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"expf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"logf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log10f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"modff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"powf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sqrtf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ceilf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"floorf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmodf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"isfinite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"umin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"llmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"ullmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"umax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"llmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"ullmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isinff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isnanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__finite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__finitef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__signbitf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__signbitl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__finitel", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isinfl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isnanl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"_ldsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"_fdsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"_Pow_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // static math functions declared in device-functions.h - {"mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"saturate", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"float2int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"int2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"uint2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // device functions - {"__mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__umulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__umul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__syncthreads", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__threadfence", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__threadfence_block", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__saturatef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__sad", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__usad", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fdividef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdividef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fdivide", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__sinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__cosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__tanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__sincosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__expf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__exp10f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__log2f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__log10f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__logf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__powf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2int_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2int_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2int_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2int_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2uint_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2uint_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2uint_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2uint_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2float_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2float_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2float_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2float_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2float_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2float_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2float_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2float_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ll_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ll_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ll_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ll_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ull_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ull_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ull_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ull_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2float_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2float_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2float_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2float_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2float_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2float_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2float_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2float_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fadd_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fadd_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fadd_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fadd_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsub_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsub_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsub_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsub_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmul_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmul_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmul_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmul_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmaf_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmaf_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmaf_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmaf_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frcp_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frcp_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frcp_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frcp_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsqrt_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsqrt_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsqrt_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsqrt_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frsqrt_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdiv_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdiv_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdiv_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdiv_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__clz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ffs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__popc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__brev", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__clzll", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ffsll", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__popcll", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__brevll", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__byte_perm", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hadd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__rhadd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uhadd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__urhadd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__double2int_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__double2uint_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__double2ll_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__double2ull_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__prof_trigger", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__trap", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__brkpt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__pm0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__pm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__pm2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__pm3", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // SIMD functions - {"__vabs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vaddss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vaddus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vavgs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vavgu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vhaddu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmples2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmplts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsdiffu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmaxs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmaxu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmins2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vminu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vseteq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetles2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetlts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsadu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsubss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsubus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vnegss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsdiffs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsads2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vadd4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vaddss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vaddus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vavgs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vavgu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vhaddu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpeq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmples4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmplts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsdiffu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmaxs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmaxu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmins4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vminu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vseteq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetles4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetlts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsadu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsub4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsubss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsubus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vneg4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vnegss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsdiffs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsads4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // fp16 functions - {"__float2half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half2_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__floats2half2_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__low2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__high2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float22half2_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half22float2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2int_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2int_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2int_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2int_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2short_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2short_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2short_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2short_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2uint_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2uint_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2uint_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2uint_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ushort_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ushort_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ushort_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ushort_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ull_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ull_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ull_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ull_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ll_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ll_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ll_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ll_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"htrunc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hceil", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hfloor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hrint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2trunc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2ceil", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2floor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2rint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__lowhigh2highlow", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__lows2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__highs2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__high2half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__low2half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hisinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__halves2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__low2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__high2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half_as_short", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half_as_ushort", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short_as_half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort_as_half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ldg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ldcg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ldca", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ldcs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__heq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hle2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hge2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hlt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hequ2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hneu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hisnan2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hmul2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__h2div", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hadd2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hsub2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hmul2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hfma2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hfma2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hsub", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hmul", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hdiv", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hadd_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hsub_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hmul_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hfma", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hfma_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hneg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__habs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__habs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__hbeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hble2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbge2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hblt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbgt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbequ2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbneu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__heq", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hne", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hle", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hge", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hlt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hequ", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hneu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hleu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgeu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hltu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgtu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hisnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hrsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hrcp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hlog", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hlog2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hlog10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hexp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hexp2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hexp10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hcos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hsin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2sqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2rsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2rcp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2log", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2log2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2log10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2exp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2exp2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2exp10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2cos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2sin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // atomic functions - {"atomicAdd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicSub", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicExch", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicMin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicMax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicInc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicDec", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicAnd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicOr", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicXor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicCAS", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp deleted file mode 100644 index 7be0fd0f3d..0000000000 --- a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ /dev/null @@ -1,815 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all CUDA Driver API functions -const std::map CUDA_DRIVER_FUNCTION_MAP{ - // 5.2. Error Handling - // no analogue - // NOTE: cudaGetErrorName and cuGetErrorName have different signatures - {"cuGetErrorName", {"hipGetErrorName_", "", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: cudaGetErrorString and cuGetErrorString have different signatures - {"cuGetErrorString", {"hipGetErrorString_", "", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.3. Initialization - // no analogue - {"cuInit", {"hipInit", "", CONV_INIT, API_DRIVER}}, - - // 5.4 Version Management - // cudaDriverGetVersion - {"cuDriverGetVersion", {"hipDriverGetVersion", "", CONV_VERSION, API_DRIVER}}, - - // 5.5. Device Management - // cudaGetDevice - // NOTE: cudaGetDevice has additional attr: int ordinal - {"cuDeviceGet", {"hipGetDevice", "", CONV_DEVICE, API_DRIVER}}, - // cudaDeviceGetAttribute - {"cuDeviceGetAttribute", {"hipDeviceGetAttribute", "", CONV_DEVICE, API_DRIVER}}, - // cudaGetDeviceCount - {"cuDeviceGetCount", {"hipGetDeviceCount", "", CONV_DEVICE, API_DRIVER}}, - // no analogue - {"cuDeviceGetLuid", {"hipDeviceGetLuid", "", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuDeviceGetName", {"hipDeviceGetName", "", CONV_DEVICE, API_DRIVER}}, - // cudaDeviceGetNvSciSyncAttributes - {"cuDeviceGetNvSciSyncAttributes", {"hipDeviceGetNvSciSyncAttributes", "", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuDeviceGetUuid", {"hipDeviceGetUuid", "", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuDeviceTotalMem", {"hipDeviceTotalMem", "", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceTotalMem_v2", {"hipDeviceTotalMem", "", CONV_DEVICE, API_DRIVER}}, - - // 5.6. Device Management [DEPRECATED] - {"cuDeviceComputeCapability", {"hipDeviceComputeCapability", "", CONV_DEVICE, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaGetDeviceProperties due to different attributes: cudaDeviceProp and CUdevprop - {"cuDeviceGetProperties", {"hipGetDeviceProperties_", "", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.7. Primary Context Management - // no analogues - {"cuDevicePrimaryCtxGetState", {"hipDevicePrimaryCtxGetState", "", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxRelease", {"hipDevicePrimaryCtxRelease", "", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxReset", {"hipDevicePrimaryCtxReset", "", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxRetain", {"hipDevicePrimaryCtxRetain", "", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxSetFlags", {"hipDevicePrimaryCtxSetFlags", "", CONV_CONTEXT, API_DRIVER}}, - - // 5.8. Context Management - // no analogues, except a few - {"cuCtxCreate", {"hipCtxCreate", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxCreate_v2", {"hipCtxCreate", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxDestroy", {"hipCtxDestroy", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxDestroy_v2", {"hipCtxDestroy", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetApiVersion", {"hipCtxGetApiVersion", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetCacheConfig", {"hipCtxGetCacheConfig", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetCurrent", {"hipCtxGetCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetDevice", {"hipCtxGetDevice", "", CONV_CONTEXT, API_DRIVER}}, - // cudaGetDeviceFlags - // TODO: rename to hipGetDeviceFlags - {"cuCtxGetFlags", {"hipCtxGetFlags", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceGetLimit - {"cuCtxGetLimit", {"hipDeviceGetLimit", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceGetSharedMemConfig - // TODO: rename to hipDeviceGetSharedMemConfig - {"cuCtxGetSharedMemConfig", {"hipCtxGetSharedMemConfig", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceGetStreamPriorityRange - {"cuCtxGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPopCurrent", {"hipCtxPopCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPopCurrent_v2", {"hipCtxPopCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPushCurrent", {"hipCtxPushCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPushCurrent_v2", {"hipCtxPushCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetCacheConfig", {"hipCtxSetCacheConfig", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetCurrent", {"hipCtxSetCurrent", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceSetLimit - {"cuCtxSetLimit", {"hipDeviceSetLimit", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceSetSharedMemConfig - // TODO: rename to hipDeviceSetSharedMemConfig - {"cuCtxSetSharedMemConfig", {"hipCtxSetSharedMemConfig", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceSynchronize - // TODO: rename to hipDeviceSynchronize - {"cuCtxSynchronize", {"hipCtxSynchronize", "", CONV_CONTEXT, API_DRIVER}}, - - // 5.9. Context Management [DEPRECATED] - // no analogues - {"cuCtxAttach", {"hipCtxAttach", "", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxDetach", {"hipCtxDetach", "", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.10. Module Management - // no analogues - {"cuLinkAddData", {"hipLinkAddData", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkAddData_v2", {"hipLinkAddData", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkAddFile", {"hipLinkAddFile", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkAddFile_v2", {"hipLinkAddFile", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkComplete", {"hipLinkComplete", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkCreate", {"hipLinkCreate", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkCreate_v2", {"hipLinkCreate", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkDestroy", {"hipLinkDestroy", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleGetFunction", {"hipModuleGetFunction", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetGlobal", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetSurfRef", {"hipModuleGetSurfRef", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleGetTexRef", {"hipModuleGetTexRef", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoad", {"hipModuleLoad", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadData", {"hipModuleLoadData", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadDataEx", {"hipModuleLoadDataEx", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadFatBinary", {"hipModuleLoadFatBinary", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleUnload", {"hipModuleUnload", "", CONV_MODULE, API_DRIVER}}, - - // 5.11. Memory Management - // no analogue - {"cuArray3DCreate", {"hipArray3DCreate", "", CONV_MEMORY, API_DRIVER}}, - {"cuArray3DCreate_v2", {"hipArray3DCreate", "", CONV_MEMORY, API_DRIVER}}, - {"cuArray3DGetDescriptor", {"hipArray3DGetDescriptor", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArray3DGetDescriptor_v2", {"hipArray3DGetDescriptor", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayCreate", {"hipArrayCreate", "", CONV_MEMORY, API_DRIVER}}, - {"cuArrayCreate_v2", {"hipArrayCreate", "", CONV_MEMORY, API_DRIVER}}, - {"cuArrayDestroy", {"hipArrayDestroy", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayGetDescriptor", {"hipArrayGetDescriptor", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayGetDescriptor_v2", {"hipArrayGetDescriptor", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaDeviceGetByPCIBusId - {"cuDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", "", CONV_MEMORY, API_DRIVER}}, - // cudaDeviceGetPCIBusId - {"cuDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", "", CONV_MEMORY, API_DRIVER}}, - // cudaIpcCloseMemHandle - {"cuIpcCloseMemHandle", {"hipIpcCloseMemHandle", "", CONV_MEMORY, API_DRIVER}}, - // cudaIpcGetEventHandle - {"cuIpcGetEventHandle", {"hipIpcGetEventHandle", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaIpcGetMemHandle - {"cuIpcGetMemHandle", {"hipIpcGetMemHandle", "", CONV_MEMORY, API_DRIVER}}, - // cudaIpcOpenEventHandle - {"cuIpcOpenEventHandle", {"hipIpcOpenEventHandle", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaIpcOpenMemHandle - {"cuIpcOpenMemHandle", {"hipIpcOpenMemHandle", "", CONV_MEMORY, API_DRIVER}}, - // cudaMalloc - {"cuMemAlloc", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemAlloc_v2", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostAlloc - {"cuMemAllocHost", {"hipHostMalloc", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemAllocHost_v2", {"hipHostMalloc", "", CONV_MEMORY, API_DRIVER}}, - // cudaMallocManaged - {"cuMemAllocManaged", {"hipMallocManaged", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMallocPitch due to different signatures - {"cuMemAllocPitch", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemAllocPitch_v2", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMemcpy due to different signatures - {"cuMemcpy", {"hipMemcpy_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy2D due to different signatures - {"cuMemcpy2D", {"hipMemcpyParam2D", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpy2D_v2", {"hipMemcpyParam2D", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMemcpy2DAsync/hipMemcpy2DAsync due to different signatures - {"cuMemcpy2DAsync", {"hipMemcpyParam2DAsync", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpy2DAsync_v2", {"hipMemcpyParam2DAsync", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpy2DUnaligned", {"hipMemcpy2DUnaligned", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpy2DUnaligned_v2", {"hipMemcpy2DUnaligned", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy3D due to different signatures - {"cuMemcpy3D", {"hipMemcpy3D_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpy3D_v2", {"hipMemcpy3D_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy3DAsync due to different signatures - {"cuMemcpy3DAsync", {"hipMemcpy3DAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpy3DAsync_v2", {"hipMemcpy3DAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy3DPeer due to different signatures - {"cuMemcpy3DPeer", {"hipMemcpy3DPeer_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy3DPeerAsync due to different signatures - {"cuMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpyAsync due to different signatures - {"cuMemcpyAsync", {"hipMemcpyAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpyArrayToArray due to different signatures - {"cuMemcpyAtoA", {"hipMemcpyAtoA", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoA_v2", {"hipMemcpyAtoA", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyAtoD", {"hipMemcpyAtoD", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoD_v2", {"hipMemcpyAtoD", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyAtoH", {"hipMemcpyAtoH", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyAtoH_v2", {"hipMemcpyAtoH", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyAtoHAsync", {"hipMemcpyAtoHAsync", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoHAsync_v2", {"hipMemcpyAtoHAsync", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyDtoA", {"hipMemcpyDtoA", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyDtoA_v2", {"hipMemcpyDtoA", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyDtoD", {"hipMemcpyDtoD", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoD_v2", {"hipMemcpyDtoD", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyDtoDAsync", {"hipMemcpyDtoDAsync", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoDAsync_v2", {"hipMemcpyDtoDAsync", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyDtoH", {"hipMemcpyDtoH", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoH_v2", {"hipMemcpyDtoH", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyDtoHAsync", {"hipMemcpyDtoHAsync", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoHAsync_v2", {"hipMemcpyDtoHAsync", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyHtoA", {"hipMemcpyHtoA", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoA_v2", {"hipMemcpyHtoA", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyHtoAAsync", {"hipMemcpyHtoAAsync", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyHtoAAsync_v2", {"hipMemcpyHtoAAsync", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyHtoD", {"hipMemcpyHtoD", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoD_v2", {"hipMemcpyHtoD", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyHtoDAsync", {"hipMemcpyHtoDAsync", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoDAsync_v2", {"hipMemcpyHtoDAsync", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMemcpyPeer due to different signatures - {"cuMemcpyPeer", {"hipMemcpyPeer_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpyPeerAsync due to different signatures - {"cuMemcpyPeerAsync", {"hipMemcpyPeerAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaFree - {"cuMemFree", {"hipFree", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemFree_v2", {"hipFree", "", CONV_MEMORY, API_DRIVER}}, - // cudaFreeHost - {"cuMemFreeHost", {"hipHostFree", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemGetAddressRange", {"hipMemGetAddressRange", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemGetAddressRange_v2", {"hipMemGetAddressRange", "", CONV_MEMORY, API_DRIVER}}, - // cudaMemGetInfo - {"cuMemGetInfo", {"hipMemGetInfo", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemGetInfo_v2", {"hipMemGetInfo", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostAlloc - {"cuMemHostAlloc", {"hipHostMalloc", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostGetDevicePointer - {"cuMemHostGetDevicePointer", {"hipHostGetDevicePointer", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemHostGetDevicePointer_v2", {"hipHostGetDevicePointer", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostGetFlags - {"cuMemHostGetFlags", {"hipHostGetFlags", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostRegister - {"cuMemHostRegister", {"hipHostRegister", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemHostRegister_v2", {"hipHostRegister", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostUnregister - {"cuMemHostUnregister", {"hipHostUnregister", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD16", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemsetD16_v2", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD16Async", {"hipMemsetD16Async", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD2D16", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D16_v2", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D16Async", {"hipMemsetD2D16Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D32", {"hipMemsetD2D32", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D32_v2", {"hipMemsetD2D32", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D32Async", {"hipMemsetD2D32Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D8", {"hipMemsetD2D8", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D8_v2", {"hipMemsetD2D8", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D8Async", {"hipMemsetD2D8Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaMemset - {"cuMemsetD32", {"hipMemsetD32", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemsetD32_v2", {"hipMemsetD32", "", CONV_MEMORY, API_DRIVER}}, - // cudaMemsetAsync - {"cuMemsetD32Async", {"hipMemsetD32Async", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD8", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemsetD8_v2", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMallocMipmappedArray due to different signatures - {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaFreeMipmappedArray due to different signatures - {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGetMipmappedArrayLevel due to different signatures - {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.12. Virtual Memory Management - // no analogue - {"cuMemAddressFree", {"hipMemAddressFree", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAddressReserve", {"hipMemAddressReserve", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemCreate", {"hipMemCreate", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemExportToShareableHandle", {"hipMemExportToShareableHandle", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemGetAccess", {"hipMemGetAccess", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemGetAllocationGranularity", {"hipMemGetAllocationGranularity", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemGetAllocationPropertiesFromHandle", {"hipMemGetAllocationPropertiesFromHandle", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemImportFromShareableHandle", {"hipMemImportFromShareableHandle", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemMap", {"hipMemMap", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemRelease", {"hipMemRelease", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemSetAccess", {"hipMemSetAccess", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemUnmap", {"hipMemUnmap", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.13. Unified Addressing - // cudaMemAdvise - {"cuMemAdvise", {"hipMemAdvise", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // TODO: double check cudaMemPrefetchAsync - {"cuMemPrefetchAsync", {"hipMemPrefetchAsync_", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaMemRangeGetAttribute - {"cuMemRangeGetAttribute", {"hipMemRangeGetAttribute", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaMemRangeGetAttributes - {"cuMemRangeGetAttributes", {"hipMemRangeGetAttributes", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuPointerGetAttribute", {"hipPointerGetAttribute", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaPointerGetAttributes due to different signatures - {"cuPointerGetAttributes", {"hipPointerGetAttributes_", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuPointerSetAttribute", {"hipPointerSetAttribute", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.14. Stream Management - // cudaStreamAddCallback - {"cuStreamAddCallback", {"hipStreamAddCallback", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamAttachMemAsync - {"cuStreamAttachMemAsync", {"hipStreamAttachMemAsync", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamBeginCapture - {"cuStreamBeginCapture", {"hipStreamBeginCapture", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamBeginCapture_v2", {"hipStreamBeginCapture", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamBeginCapture_ptsz", {"hipStreamBeginCapture", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamCreateWithFlags - {"cuStreamCreate", {"hipStreamCreateWithFlags", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamCreateWithPriority - {"cuStreamCreateWithPriority", {"hipStreamCreateWithPriority", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamDestroy - {"cuStreamDestroy", {"hipStreamDestroy", "", CONV_STREAM, API_DRIVER}}, - {"cuStreamDestroy_v2", {"hipStreamDestroy", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamEndCapture - {"cuStreamEndCapture", {"hipStreamEndCapture", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamGetCaptureInfo - {"cuStreamGetCaptureInfo", {"hipStreamGetCaptureInfo", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuStreamGetCtx", {"hipStreamGetContext", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamGetFlags - {"cuStreamGetFlags", {"hipStreamGetFlags", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamGetPriority - {"cuStreamGetPriority", {"hipStreamGetPriority", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamIsCapturing - {"cuStreamIsCapturing", {"hipStreamIsCapturing", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamQuery - {"cuStreamQuery", {"hipStreamQuery", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamSynchronize - {"cuStreamSynchronize", {"hipStreamSynchronize", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamWaitEvent - {"cuStreamWaitEvent", {"hipStreamWaitEvent", "", CONV_STREAM, API_DRIVER}}, - // cudaThreadExchangeStreamCaptureMode - {"cuThreadExchangeStreamCaptureMode", {"hipThreadExchangeStreamCaptureMode", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.15. Event Management - // cudaEventCreateWithFlags - {"cuEventCreate", {"hipEventCreateWithFlags", "", CONV_EVENT, API_DRIVER}}, - // cudaEventDestroy - {"cuEventDestroy", {"hipEventDestroy", "", CONV_EVENT, API_DRIVER}}, - {"cuEventDestroy_v2", {"hipEventDestroy", "", CONV_EVENT, API_DRIVER}}, - // cudaEventElapsedTime - {"cuEventElapsedTime", {"hipEventElapsedTime", "", CONV_EVENT, API_DRIVER}}, - // cudaEventQuery - {"cuEventQuery", {"hipEventQuery", "", CONV_EVENT, API_DRIVER}}, - // cudaEventRecord - {"cuEventRecord", {"hipEventRecord", "", CONV_EVENT, API_DRIVER}}, - // cudaEventSynchronize - {"cuEventSynchronize", {"hipEventSynchronize", "", CONV_EVENT, API_DRIVER}}, - - // 5.16. External Resource Interoperability - // cudaDestroyExternalMemory - {"cuDestroyExternalMemory", {"hipDestroyExternalMemory", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaDestroyExternalSemaphore - {"cuDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaExternalMemoryGetMappedBuffer - {"cuExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaExternalMemoryGetMappedMipmappedArray - {"cuExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaImportExternalMemory - {"cuImportExternalMemory", {"hipImportExternalMemory", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaImportExternalSemaphore - {"cuImportExternalSemaphore", {"hipImportExternalSemaphore", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaSignalExternalSemaphoresAsync - {"cuSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaWaitExternalSemaphoresAsync - {"cuWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.17. Stream Memory Operations - // no analogues - {"cuStreamBatchMemOp", {"hipStreamBatchMemOp", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamWaitValue32", {"hipStreamWaitValue32", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamWaitValue64", {"hipStreamWaitValue64", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamWriteValue32", {"hipStreamWriteValue32", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamWriteValue64", {"hipStreamWriteValue64", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.18.Execution Control - // no analogue - {"cuFuncGetAttribute", {"hipFuncGetAttribute", "", CONV_EXECUTION, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaFuncSetAttribute due to different signatures - {"cuFuncSetAttribute", {"hipFuncSetAttribute", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaFuncSetCacheConfig due to different signatures - {"cuFuncSetCacheConfig", {"hipFuncSetCacheConfig", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaFuncSetSharedMemConfig due to different signatures - {"cuFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaLaunchCooperativeKernel due to different signatures - {"cuLaunchCooperativeKernel", {"hipLaunchCooperativeKernel_", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaLaunchCooperativeKernelMultiDevice due to different signatures - {"cuLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice_", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaLaunchHostFunc - {"cuLaunchHostFunc", {"hipLaunchHostFunc", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaLaunchKernel due to different signatures - {"cuLaunchKernel", {"hipModuleLaunchKernel", "", CONV_EXECUTION, API_DRIVER}}, - - // 5.19.Execution Control [DEPRECATED] - // no analogue - {"cuFuncSetBlockShape", {"hipFuncSetBlockShape", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuFuncSetSharedSize", {"hipFuncSetSharedSize", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaLaunch due to different signatures - {"cuLaunch", {"hipLaunch", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuLaunchGrid", {"hipLaunchGrid", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuLaunchGridAsync", {"hipLaunchGridAsync", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSetf", {"hipParamSetf", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSeti", {"hipParamSeti", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSetSize", {"hipParamSetSize", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSetTexRef", {"hipParamSetTexRef", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSetv", {"hipParamSetv", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.20. Graph Management - // cudaGraphAddChildGraphNode - {"cuGraphAddChildGraphNode", {"hipGraphAddChildGraphNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddDependencies - {"cuGraphAddDependencies", {"hipGraphAddDependencies", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddEmptyNode - {"cuGraphAddEmptyNode", {"hipGraphAddEmptyNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddHostNode - {"cuGraphAddHostNode", {"hipGraphAddHostNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddKernelNode - {"cuGraphAddKernelNode", {"hipGraphAddKernelNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddMemcpyNode - {"cuGraphAddMemcpyNode", {"hipGraphAddMemcpyNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddMemsetNode - {"cuGraphAddMemsetNode", {"hipGraphAddMemsetNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphChildGraphNodeGetGraph - {"cuGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphClone - {"cuGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphCreate - {"cuGraphCreate", {"hipGraphCreate", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphDestroy - {"cuGraphDestroy", {"hipGraphDestroy", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphDestroyNode - {"cuGraphDestroyNode", {"hipGraphDestroyNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecDestroy - {"cuGraphExecDestroy", {"hipGraphExecDestroy", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphGetEdges - {"cuGraphGetEdges", {"hipGraphGetEdges", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphGetNodes - {"cuGraphGetNodes", {"hipGraphGetNodes", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphGetRootNodes - {"cuGraphGetRootNodes", {"hipGraphGetRootNodes", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphHostNodeGetParams - {"cuGraphHostNodeGetParams", {"hipGraphHostNodeGetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphHostNodeSetParams - {"cuGraphHostNodeSetParams", {"hipGraphHostNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphInstantiate - {"cuGraphInstantiate", {"hipGraphInstantiate", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecKernelNodeSetParams - {"cuGraphExecKernelNodeSetParams", {"hipGraphExecKernelNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphKernelNodeGetParams - {"cuGraphKernelNodeGetParams", {"hipGraphKernelNodeGetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphKernelNodeSetParams - {"cuGraphKernelNodeSetParams", {"hipGraphKernelNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphLaunch - {"cuGraphLaunch", {"hipGraphLaunch", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphMemcpyNodeGetParams - {"cuGraphMemcpyNodeGetParams", {"hipGraphMemcpyNodeGetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphMemcpyNodeSetParams - {"cuGraphMemcpyNodeSetParams", {"hipGraphMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphMemsetNodeGetParams - {"cuGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphMemsetNodeSetParams - {"cuGraphMemsetNodeSetParams", {"hipGraphMemsetNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeFindInClone - {"cuGraphNodeFindInClone", {"hipGraphNodeFindInClone", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeGetDependencies - {"cuGraphNodeGetDependencies", {"hipGraphNodeGetDependencies", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeGetDependentNodes - {"cuGraphNodeGetDependentNodes", {"hipGraphNodeGetDependentNodes", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeGetType - {"cuGraphNodeGetType", {"hipGraphNodeGetType", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphRemoveDependencies - {"cuGraphRemoveDependencies", {"hipGraphRemoveDependencies", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecMemcpyNodeSetParams - {"cuGraphExecMemcpyNodeSetParams", {"hipGraphExecMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecMemsetNodeSetParams - {"cuGraphExecMemsetNodeSetParams", {"hipGraphExecMemsetNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecHostNodeSetParams - {"cuGraphExecHostNodeSetParams", {"hipGraphExecHostNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecUpdate - {"cuGraphExecUpdate", {"hipGraphExecUpdate", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.21. Occupancy - // cudaOccupancyMaxActiveBlocksPerMultiprocessor - {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}}, - // cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags - {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags","", CONV_OCCUPANCY, API_DRIVER}}, - // cudaOccupancyMaxPotentialBlockSize - {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER}}, - // cudaOccupancyMaxPotentialBlockSizeWithFlags - {"cuOccupancyMaxPotentialBlockSizeWithFlags", {"hipOccupancyMaxPotentialBlockSizeWithFlags", "", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.22. Texture Reference Management [DEPRECATED] - // no analogues - {"cuTexRefGetAddress", {"hipTexRefGetAddress", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefGetAddress_v2", {"hipTexRefGetAddress", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefGetAddressMode", {"hipTexRefGetAddressMode", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefGetArray", {"hipTexRefGetArray", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefGetBorderColor", {"hipTexRefGetBorderColor", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFilterMode", {"hipTexRefGetFilterMode", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFlags", {"hipTexRefGetFlags", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFormat", {"hipTexRefGetFormat", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMaxAnisotropy", {"hipTexRefGetMaxAnisotropy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapFilterMode", {"hipTexRefGetMipmapFilterMode", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapLevelBias", {"hipTexRefGetMipmapLevelBias", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapLevelClamp", {"hipTexRefGetMipmapLevelClamp", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmappedArray", {"hipTexRefGetMipmappedArray", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetAddress", {"hipTexRefSetAddress", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddress_v2", {"hipTexRefSetAddress", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddress2D", {"hipTexRefSetAddress2D", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddress2D_v2", {"hipTexRefSetAddress2D", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddress2D_v3", {"hipTexRefSetAddress2D", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddressMode", {"hipTexRefSetAddressMode", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetArray", {"hipTexRefSetArray", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetBorderColor", {"hipTexRefSetBorderColor", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetFilterMode", {"hipTexRefSetFilterMode", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetFlags", {"hipTexRefSetFlags", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetFormat", {"hipTexRefSetFormat", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetMaxAnisotropy", {"hipTexRefSetMaxAnisotropy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapFilterMode", {"hipTexRefSetMipmapFilterMode", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapLevelBias", {"hipTexRefSetMipmapLevelBias", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapLevelClamp", {"hipTexRefSetMipmapLevelClamp", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmappedArray", {"hipTexRefSetMipmappedArray", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefCreate", {"hipTexRefCreate", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefDestroy", {"hipTexRefDestroy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.23. Surface Reference Management [DEPRECATED] - // no analogues - {"cuSurfRefGetArray", {"hipSurfRefGetArray", "", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuSurfRefSetArray", {"hipSurfRefSetArray", "", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.24. Texture Object Management - // no analogue - // NOTE: Not equal to cudaCreateTextureObject due to different signatures - {"cuTexObjectCreate", {"hipTexObjectCreate", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaDestroyTextureObject - {"cuTexObjectDestroy", {"hipTexObjectDestroy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGetTextureObjectResourceDesc due to different signatures - {"cuTexObjectGetResourceDesc", {"hipTexObjectGetResourceDesc", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGetTextureObjectResourceViewDesc - {"cuTexObjectGetResourceViewDesc", {"hipTexObjectGetResourceViewDesc", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGetTextureObjectTextureDesc due to different signatures - {"cuTexObjectGetTextureDesc", {"hipTexObjectGetTextureDesc", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.25. Surface Object Management - // no analogue - // NOTE: Not equal to cudaCreateSurfaceObject due to different signatures - {"cuSurfObjectCreate", {"hipSurfObjectCreate", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaDestroySurfaceObject - {"cuSurfObjectDestroy", {"hipSurfObjectDestroy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGetSurfaceObjectResourceDesc due to different signatures - {"cuSurfObjectGetResourceDesc", {"hipSurfObjectGetResourceDesc", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.26. Peer Context Memory Access - // no analogue - // NOTE: Not equal to cudaDeviceEnablePeerAccess due to different signatures - {"cuCtxEnablePeerAccess", {"hipCtxEnablePeerAccess", "", CONV_PEER, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaDeviceDisablePeerAccess due to different signatures - {"cuCtxDisablePeerAccess", {"hipCtxDisablePeerAccess", "", CONV_PEER, API_DRIVER}}, - // cudaDeviceCanAccessPeer - {"cuDeviceCanAccessPeer", {"hipDeviceCanAccessPeer", "", CONV_PEER, API_DRIVER}}, - // cudaDeviceGetP2PAttribute - {"cuDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", "", CONV_PEER, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.27. Graphics Interoperability - // cudaGraphicsMapResources - {"cuGraphicsMapResources", {"hipGraphicsMapResources", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceGetMappedMipmappedArray - {"cuGraphicsResourceGetMappedMipmappedArray", {"hipGraphicsResourceGetMappedMipmappedArray", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceGetMappedPointer - {"cuGraphicsResourceGetMappedPointer", {"hipGraphicsResourceGetMappedPointer", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceGetMappedPointer - {"cuGraphicsResourceGetMappedPointer_v2", {"hipGraphicsResourceGetMappedPointer", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceSetMapFlags - {"cuGraphicsResourceSetMapFlags", {"hipGraphicsResourceSetMapFlags", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceSetMapFlags - {"cuGraphicsResourceSetMapFlags_v2", {"hipGraphicsResourceSetMapFlags", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsSubResourceGetMappedArray - {"cuGraphicsSubResourceGetMappedArray", {"hipGraphicsSubResourceGetMappedArray", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsUnmapResources - {"cuGraphicsUnmapResources", {"hipGraphicsUnmapResources", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsUnregisterResource - {"cuGraphicsUnregisterResource", {"hipGraphicsUnregisterResource", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.28. Profiler Control - // cudaProfilerInitialize - {"cuProfilerInitialize", {"hipProfilerInitialize", "", CONV_PROFILER, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaProfilerStart - {"cuProfilerStart", {"hipProfilerStart", "", CONV_PROFILER, API_DRIVER}}, - // cudaProfilerStop - {"cuProfilerStop", {"hipProfilerStop", "", CONV_PROFILER, API_DRIVER}}, - - // 5.29. OpenGL Interoperability - // cudaGLGetDevices - {"cuGLGetDevices", {"hipGLGetDevices", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsGLRegisterBuffer - {"cuGraphicsGLRegisterBuffer", {"hipGraphicsGLRegisterBuffer", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsGLRegisterImage - {"cuGraphicsGLRegisterImage", {"hipGraphicsGLRegisterImage", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaWGLGetDevice - {"cuWGLGetDevice", {"hipWGLGetDevice", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.29. OpenGL Interoperability [DEPRECATED] - // no analogue - {"cuGLCtxCreate", {"hipGLCtxCreate", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuGLInit", {"hipGLInit", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGLMapBufferObject due to different signatures - {"cuGLMapBufferObject", {"hipGLMapBufferObject_", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGLMapBufferObjectAsync due to different signatures - {"cuGLMapBufferObjectAsync", {"hipGLMapBufferObjectAsync_", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLRegisterBufferObject - {"cuGLRegisterBufferObject", {"hipGLRegisterBufferObject", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLSetBufferObjectMapFlags - {"cuGLSetBufferObjectMapFlags", {"hipGLSetBufferObjectMapFlags", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLUnmapBufferObject - {"cuGLUnmapBufferObject", {"hipGLUnmapBufferObject", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLUnmapBufferObjectAsync - {"cuGLUnmapBufferObjectAsync", {"hipGLUnmapBufferObjectAsync", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLUnregisterBufferObject - {"cuGLUnregisterBufferObject", {"hipGLUnregisterBufferObject", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.30.Direct3D 9 Interoperability - // no analogue - {"cuD3D9CtxCreate", {"hipD3D9CtxCreate", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuD3D9CtxCreateOnDevice", {"hipD3D9CtxCreateOnDevice", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9GetDevice - {"cuD3D9GetDevice", {"hipD3D9GetDevice", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9GetDevices - {"cuD3D9GetDevices", {"hipD3D9GetDevices", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9GetDirect3DDevice - {"cuD3D9GetDirect3DDevice", {"hipD3D9GetDirect3DDevice", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsD3D9RegisterResource - {"cuGraphicsD3D9RegisterResource", {"hipGraphicsD3D9RegisterResource", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.30.Direct3D 9 Interoperability [DEPRECATED] - // cudaD3D9MapResources - {"cuD3D9MapResources", {"hipD3D9MapResources", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9RegisterResource - {"cuD3D9RegisterResource", {"hipD3D9RegisterResource", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedArray - {"cuD3D9ResourceGetMappedArray", {"hipD3D9ResourceGetMappedArray", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedPitch - {"cuD3D9ResourceGetMappedPitch", {"hipD3D9ResourceGetMappedPitch", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedPointer - {"cuD3D9ResourceGetMappedPointer", {"hipD3D9ResourceGetMappedPointer", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedSize - {"cuD3D9ResourceGetMappedSize", {"hipD3D9ResourceGetMappedSize", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetSurfaceDimensions - {"cuD3D9ResourceGetSurfaceDimensions", {"hipD3D9ResourceGetSurfaceDimensions", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceSetMapFlags - {"cuD3D9ResourceSetMapFlags", {"hipD3D9ResourceSetMapFlags", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9UnmapResources - {"cuD3D9UnmapResources", {"hipD3D9UnmapResources", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9UnregisterResource - {"cuD3D9UnregisterResource", {"hipD3D9UnregisterResource", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.31. Direct3D 10 Interoperability - // cudaD3D10GetDevice - {"cuD3D10GetDevice", {"hipD3D10GetDevice", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10GetDevices - {"cuD3D10GetDevices", {"hipD3D10GetDevices", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsD3D10RegisterResource - {"cuGraphicsD3D10RegisterResource", {"hipGraphicsD3D10RegisterResource", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.31. Direct3D 10 Interoperability [DEPRECATED] - // no analogue - {"cuD3D10CtxCreate", {"hipD3D10CtxCreate", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuD3D10CtxCreateOnDevice", {"hipD3D10CtxCreateOnDevice", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10GetDirect3DDevice - {"cuD3D10GetDirect3DDevice", {"hipD3D10GetDirect3DDevice", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10MapResources - {"cuD3D10MapResources", {"hipD3D10MapResources", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10RegisterResource - {"cuD3D10RegisterResource", {"hipD3D10RegisterResource", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedArray - {"cuD3D10ResourceGetMappedArray", {"hipD3D10ResourceGetMappedArray", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedPitch - {"cuD3D10ResourceGetMappedPitch", {"hipD3D10ResourceGetMappedPitch", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedPointer - {"cuD3D10ResourceGetMappedPointer", {"hipD3D10ResourceGetMappedPointer", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedSize - {"cuD3D10ResourceGetMappedSize", {"hipD3D10ResourceGetMappedSize", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetSurfaceDimensions - {"cuD3D10ResourceGetSurfaceDimensions", {"hipD3D10ResourceGetSurfaceDimensions", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceSetMapFlags - {"cuD310ResourceSetMapFlags", {"hipD3D10ResourceSetMapFlags", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10UnmapResources - {"cuD3D10UnmapResources", {"hipD3D10UnmapResources", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10UnregisterResource - {"cuD3D10UnregisterResource", {"hipD3D10UnregisterResource", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.32. Direct3D 11 Interoperability - // cudaD3D11GetDevice - {"cuD3D11GetDevice", {"hipD3D11GetDevice", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D11GetDevices - {"cuD3D11GetDevices", {"hipD3D11GetDevices", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsD3D11RegisterResource - {"cuGraphicsD3D11RegisterResource", {"hipGraphicsD3D11RegisterResource", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.32. Direct3D 11 Interoperability [DEPRECATED] - // no analogue - {"cuD3D11CtxCreate", {"hipD3D11CtxCreate", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuD3D11CtxCreateOnDevice", {"hipD3D11CtxCreateOnDevice", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D11GetDirect3DDevice - {"cuD3D11GetDirect3DDevice", {"hipD3D11GetDirect3DDevice", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.33. VDPAU Interoperability - // cudaGraphicsVDPAURegisterOutputSurface - {"cuGraphicsVDPAURegisterOutputSurface", {"hipGraphicsVDPAURegisterOutputSurface", "", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsVDPAURegisterVideoSurface - {"cuGraphicsVDPAURegisterVideoSurface", {"hipGraphicsVDPAURegisterVideoSurface", "", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaVDPAUGetDevice - {"cuVDPAUGetDevice", {"hipVDPAUGetDevice", "", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuVDPAUCtxCreate", {"hipVDPAUCtxCreate", "", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.34. EGL Interoperability - // cudaEGLStreamConsumerAcquireFrame - {"cuEGLStreamConsumerAcquireFrame", {"hipEGLStreamConsumerAcquireFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamConsumerConnect - {"cuEGLStreamConsumerConnect", {"hipEGLStreamConsumerConnect", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamConsumerConnectWithFlags - {"cuEGLStreamConsumerConnectWithFlags", {"hipEGLStreamConsumerConnectWithFlags", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamConsumerDisconnect - {"cuEGLStreamConsumerDisconnect", {"hipEGLStreamConsumerDisconnect", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamConsumerReleaseFrame - {"cuEGLStreamConsumerReleaseFrame", {"hipEGLStreamConsumerReleaseFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamProducerConnect - {"cuEGLStreamProducerConnect", {"hipEGLStreamProducerConnect", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamProducerDisconnect - {"cuEGLStreamProducerDisconnect", {"hipEGLStreamProducerDisconnect", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamProducerPresentFrame - {"cuEGLStreamProducerPresentFrame", {"hipEGLStreamProducerPresentFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamProducerReturnFrame - {"cuEGLStreamProducerReturnFrame", {"hipEGLStreamProducerReturnFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsEGLRegisterImage - {"cuGraphicsEGLRegisterImage", {"hipGraphicsEGLRegisterImage", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceGetMappedEglFrame - {"cuGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEventCreateFromEGLSync - {"cuEventCreateFromEGLSync", {"hipEventCreateFromEGLSync", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp b/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp deleted file mode 100644 index 054de19800..0000000000 --- a/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp +++ /dev/null @@ -1,1617 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA DRIVER API types to the corresponding HIP types -const std::map CUDA_DRIVER_TYPE_NAME_MAP{ - - // 1. Structs - - {"CUDA_ARRAY3D_DESCRIPTOR_st", {"HIP_ARRAY3D_DESCRIPTOR", "", CONV_TYPE, API_DRIVER}}, - {"CUDA_ARRAY3D_DESCRIPTOR", {"HIP_ARRAY3D_DESCRIPTOR", "", CONV_TYPE, API_DRIVER}}, - - {"CUDA_ARRAY_DESCRIPTOR_st", {"HIP_ARRAY_DESCRIPTOR", "", CONV_TYPE, API_DRIVER}}, - {"CUDA_ARRAY_DESCRIPTOR", {"HIP_ARRAY_DESCRIPTOR", "", CONV_TYPE, API_DRIVER}}, - - // cudaExternalMemoryBufferDesc - {"CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st", {"HIP_EXTERNAL_MEMORY_BUFFER_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_MEMORY_BUFFER_DESC", {"HIP_EXTERNAL_MEMORY_BUFFER_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalMemoryHandleDesc - {"CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st", {"HIP_EXTERNAL_MEMORY_HANDLE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_MEMORY_HANDLE_DESC", {"HIP_EXTERNAL_MEMORY_HANDLE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalMemoryMipmappedArrayDesc - {"CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st", {"HIP_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC", {"HIP_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalSemaphoreHandleDesc - {"CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st", {"HIP_EXTERNAL_SEMAPHORE_HANDLE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC", {"HIP_EXTERNAL_SEMAPHORE_HANDLE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalSemaphoreSignalParams - {"CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st", {"HIP_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS", {"HIP_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalSemaphoreWaitParams - {"CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st", {"HIP_EXTERNAL_SEMAPHORE_WAIT_PARAMS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS", {"HIP_EXTERNAL_SEMAPHORE_WAIT_PARAMS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaHostNodeParams - {"CUDA_HOST_NODE_PARAMS_st", {"hipHostNodeParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_HOST_NODE_PARAMS", {"hipHostNodeParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaKernelNodeParams - {"CUDA_KERNEL_NODE_PARAMS_st", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_KERNEL_NODE_PARAMS", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: cudaLaunchParams struct differs - {"CUDA_LAUNCH_PARAMS_st", {"hipLaunchParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_LAUNCH_PARAMS", {"hipLaunchParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - {"CUDA_MEMCPY2D_st", {"hip_Memcpy2D", "", CONV_TYPE, API_DRIVER}}, - {"CUDA_MEMCPY2D", {"hip_Memcpy2D", "", CONV_TYPE, API_DRIVER}}, - - // no analogue - {"CUDA_MEMCPY3D_st", {"hip_Memcpy3D", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_MEMCPY3D", {"hip_Memcpy3D", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - {"CUDA_MEMCPY3D_PEER_st", {"hip_Memcpy3D_Peer", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_MEMCPY3D_PEER", {"hip_Memcpy3D_Peer", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaMemsetParams - {"CUDA_MEMSET_NODE_PARAMS_st", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_MEMSET_NODE_PARAMS", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - {"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st", {"HIP_POINTER_ATTRIBUTE_P2P_TOKENS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS", {"HIP_POINTER_ATTRIBUTE_P2P_TOKENS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: cudaResourceDesc struct differs - {"CUDA_RESOURCE_DESC_st", {"HIP_RESOURCE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_RESOURCE_DESC", {"HIP_RESOURCE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaResourceViewDesc - // NOTE: cudaResourceViewDesc hasn't reserved bytes in the end - {"CUDA_RESOURCE_VIEW_DESC_st", {"HIP_RESOURCE_VIEW_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_RESOURCE_VIEW_DESC", {"HIP_RESOURCE_VIEW_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: cudaTextureDesc differs - {"CUDA_TEXTURE_DESC_st", {"HIP_TEXTURE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_TEXTURE_DESC", {"HIP_TEXTURE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: cudaDeviceProp differs - {"CUdevprop_st", {"hipDeviceProp_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUdevprop", {"hipDeviceProp_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaIpcEventHandle_st - {"CUipcEventHandle_st", {"ihipIpcEventHandle_t", "", CONV_TYPE, API_DRIVER}}, - // cudaIpcEventHandle_t - {"CUipcEventHandle", {"ihipIpcEventHandle_t", "", CONV_TYPE, API_DRIVER}}, - - // cudaIpcMemHandle_st - {"CUipcMemHandle_st", {"hipIpcMemHandle_st", "", CONV_TYPE, API_DRIVER}}, - // cudaIpcMemHandle_t - {"CUipcMemHandle", {"hipIpcMemHandle_t", "", CONV_TYPE, API_DRIVER}}, - - // CUDA: "The types CUarray and cudaArray * represent the same data type and may be used interchangeably by casting the two types between each other." - // cudaArray - {"CUarray_st", {"hipArray", "", CONV_TYPE, API_DRIVER}}, - // cudaArray_t - {"CUarray", {"hipArray *", "", CONV_TYPE, API_DRIVER}}, - - // no analogue - {"CUctx_st", {"ihipCtx_t", "", CONV_TYPE, API_DRIVER}}, - {"CUcontext", {"hipCtx_t", "", CONV_TYPE, API_DRIVER}}, - - // CUeglStreamConnection_st - {"CUeglStreamConnection_st", {"hipEglStreamConnection", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEglStreamConnection - {"CUeglStreamConnection", {"hipEglStreamConnection *", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // the same - CUevent_st - {"CUevent_st", {"ihipEvent_t", "", CONV_TYPE, API_DRIVER}}, - // cudaEvent_t - {"CUevent", {"hipEvent_t", "", CONV_TYPE, API_DRIVER}}, - - // CUexternalMemory_st - {"CUextMemory_st", {"hipExtMemory_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaExternalMemory_t - {"CUexternalMemory", {"hipExternalMemory", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // CUexternalSemaphore_st - {"CUextSemaphore_st", {"hipExtSemaphore_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaExternalSemaphore_t - {"CUexternalSemaphore", {"hipExternalSemaphore", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUfunc_st", {"ihipModuleSymbol_t", "", CONV_TYPE, API_DRIVER}}, - {"CUfunction", {"hipFunction_t", "", CONV_TYPE, API_DRIVER}}, - - // the same - CUgraph_st - {"CUgraph_st", {"hipGraph_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraph_t - {"CUgraph", {"hipGraph", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // the same -CUgraphExec_st - {"CUgraphExec_st", {"hipGraphExec_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExec_t - {"CUgraphExec", {"hipGraphExec", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaGraphicsResource - {"CUgraphicsResource_st", {"hipGraphicsResource_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResource_t - {"CUgraphicsResource", {"hipGraphicsResource_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // the same - CUgraphNode_st - {"CUgraphNode_st", {"hipGraphNode_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNode_t - {"CUgraphNode", {"hipGraphNode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaMipmappedArray - {"CUmipmappedArray_st", {"hipMipmappedArray_st", "", CONV_TYPE, API_DRIVER}}, - // cudaMipmappedArray_t - {"CUmipmappedArray", {"hipMipmappedArray_t", "", CONV_TYPE, API_DRIVER}}, - - // no analogue - {"CUmod_st", {"ihipModule_t", "", CONV_TYPE, API_DRIVER}}, - {"CUmodule", {"hipModule_t", "", CONV_TYPE, API_DRIVER}}, - - // the same - CUstream_st - {"CUstream_st", {"ihipStream_t", "", CONV_TYPE, API_DRIVER}}, - // cudaStream_t - {"CUstream", {"hipStream_t", "", CONV_TYPE, API_DRIVER}}, - - // NOTE: possibly surfaceReference is analogue - {"CUsurfref_st", {"ihipSurfaceReference_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUsurfref", {"hipSurfaceReference_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // textureReference - {"CUtexref_st", {"textureReference", "", CONV_TYPE, API_DRIVER}}, - {"CUtexref", {"hipTextureReference_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // CUuuid_st - // NOTE: the same struct and its name - {"CUuuid_st", {"hipUUID", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUuuid", {"hipUUID", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUmemLocation_st", {"hipMemoryLocation", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemLocation", {"hipMemoryLocation", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUmemAllocationProp_st", {"hipMemoryAllocationProperties", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAllocationProp", {"hipMemoryAllocationProperties", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUmemAccessDesc_st", {"hipMemoryAccessDescription", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAccessDesc", {"hipMemoryAccessDescription", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 2. Unions - - {"CUstreamBatchMemOpParams", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamBatchMemOpParams_union", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 3. Enums - {"CUaddress_mode", {"hipTextureAddressMode", "", CONV_TYPE, API_DRIVER}}, - {"CUaddress_mode_enum", {"hipTextureAddressMode", "", CONV_TYPE, API_DRIVER}}, - // CUaddress_mode enum values - {"CU_TR_ADDRESS_MODE_WRAP", {"hipAddressModeWrap", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - {"CU_TR_ADDRESS_MODE_CLAMP", {"hipAddressModeClamp", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - {"CU_TR_ADDRESS_MODE_MIRROR", {"hipAddressModeMirror", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - {"CU_TR_ADDRESS_MODE_BORDER", {"hipAddressModeBorder", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - - {"CUarray_cubemap_face", {"hipGraphicsCubeFace", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUarray_cubemap_face_enum", {"hipGraphicsCubeFace", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUarray_cubemap_face enum values - // cudaGraphicsCubeFacePositiveX - {"CU_CUBEMAP_FACE_POSITIVE_X", {"hipGraphicsCubeFacePositiveX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaGraphicsCubeFaceNegativeX - {"CU_CUBEMAP_FACE_NEGATIVE_X", {"hipGraphicsCubeFaceNegativeX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaGraphicsCubeFacePositiveY - {"CU_CUBEMAP_FACE_POSITIVE_Y", {"hipGraphicsCubeFacePositiveY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaGraphicsCubeFaceNegativeY - {"CU_CUBEMAP_FACE_NEGATIVE_Y", {"hipGraphicsCubeFaceNegativeY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - // cudaGraphicsCubeFacePositiveZ - {"CU_CUBEMAP_FACE_POSITIVE_Z", {"hipGraphicsCubeFacePositiveZ", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaGraphicsCubeFaceNegativeZ - {"CU_CUBEMAP_FACE_NEGATIVE_Z", {"hipGraphicsCubeFaceNegativeZ", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x05 - - {"CUarray_format", {"hipArray_format", "", CONV_TYPE, API_DRIVER}}, - {"CUarray_format_enum", {"hipArray_format", "", CONV_TYPE, API_DRIVER}}, - // CUarray_format enum values - {"CU_AD_FORMAT_UNSIGNED_INT8", {"HIP_AD_FORMAT_UNSIGNED_INT8", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - {"CU_AD_FORMAT_UNSIGNED_INT16", {"HIP_AD_FORMAT_UNSIGNED_INT16", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - {"CU_AD_FORMAT_UNSIGNED_INT32", {"HIP_AD_FORMAT_UNSIGNED_INT32", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - {"CU_AD_FORMAT_SIGNED_INT8", {"HIP_AD_FORMAT_SIGNED_INT8", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x08 - {"CU_AD_FORMAT_SIGNED_INT16", {"HIP_AD_FORMAT_SIGNED_INT16", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x09 - {"CU_AD_FORMAT_SIGNED_INT32", {"HIP_AD_FORMAT_SIGNED_INT32", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0a - {"CU_AD_FORMAT_HALF", {"HIP_AD_FORMAT_HALF", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x10 - {"CU_AD_FORMAT_FLOAT", {"HIP_AD_FORMAT_FLOAT", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x20 - - // cudaComputeMode - {"CUcomputemode", {"hipComputeMode", "", CONV_TYPE, API_DRIVER}}, - {"CUcomputemode_enum", {"hipComputeMode", "", CONV_TYPE, API_DRIVER}}, - // CUcomputemode enum values - // cudaComputeModeDefault - {"CU_COMPUTEMODE_DEFAULT", {"hipComputeModeDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - // cudaComputeModeExclusive - // NOTE: Deprecated since CUDA 10.0 - {"CU_COMPUTEMODE_EXCLUSIVE", {"hipComputeModeExclusive", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - // cudaComputeModeProhibited - {"CU_COMPUTEMODE_PROHIBITED", {"hipComputeModeProhibited", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - // cudaComputeModeExclusiveProcess - {"CU_COMPUTEMODE_EXCLUSIVE_PROCESS", {"hipComputeModeExclusiveProcess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - - {"CUctx_flags", {"hipCctx_flags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUctx_flags_enum", {"hipCctx_flags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUctx_flags enum values - // cudaDeviceScheduleAuto - {"CU_CTX_SCHED_AUTO", {"hipDeviceScheduleAuto", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaDeviceScheduleSpin - {"CU_CTX_SCHED_SPIN", {"hipDeviceScheduleSpin", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaDeviceScheduleYield - {"CU_CTX_SCHED_YIELD", {"hipDeviceScheduleYield", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaDeviceScheduleBlockingSync - {"CU_CTX_SCHED_BLOCKING_SYNC", {"hipDeviceScheduleBlockingSync", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - // cudaDeviceBlockingSync - // NOTE: Deprecated since CUDA 4.0 and replaced with CU_CTX_SCHED_BLOCKING_SYNC - {"CU_CTX_BLOCKING_SYNC", {"hipDeviceScheduleBlockingSync", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - // cudaDeviceScheduleMask - {"CU_CTX_SCHED_MASK", {"hipDeviceScheduleMask", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x07 - // cudaDeviceMapHost - {"CU_CTX_MAP_HOST", {"hipDeviceMapHost", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x08 - // cudaDeviceLmemResizeToMax - {"CU_CTX_LMEM_RESIZE_TO_MAX", {"hipDeviceLmemResizeToMax", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x10 - // cudaDeviceMask - {"CU_CTX_FLAGS_MASK", {"hipDeviceMask", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1f - - // cudaDeviceAttr - {"CUdevice_attribute", {"hipDeviceAttribute_t", "", CONV_TYPE, API_DRIVER}}, - {"CUdevice_attribute_enum", {"hipDeviceAttribute_t", "", CONV_TYPE, API_DRIVER}}, - // CUdevice_attribute enum values - // cudaDevAttrMaxThreadsPerBlock - {"CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK", {"hipDeviceAttributeMaxThreadsPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - // cudaDevAttrMaxBlockDimX - {"CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X", {"hipDeviceAttributeMaxBlockDimX", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - // cudaDevAttrMaxBlockDimY - {"CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y", {"hipDeviceAttributeMaxBlockDimY", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - // cudaDevAttrMaxBlockDimZ - {"CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z", {"hipDeviceAttributeMaxBlockDimZ", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 4 - // cudaDevAttrMaxGridDimX - {"CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X", {"hipDeviceAttributeMaxGridDimX", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 5 - // cudaDevAttrMaxGridDimY - {"CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y", {"hipDeviceAttributeMaxGridDimY", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 6 - // cudaDevAttrMaxGridDimZ - {"CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z", {"hipDeviceAttributeMaxGridDimZ", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 7 - // cudaDevAttrMaxSharedMemoryPerBlock - {"CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK", {"hipDeviceAttributeMaxSharedMemoryPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 8 - // no analogue - // NOTE: Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK - {"CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK", {"hipDeviceAttributeMaxSharedMemoryPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 8 - // cudaDevAttrTotalConstantMemory - {"CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY", {"hipDeviceAttributeTotalConstantMemory", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 9 - // cudaDevAttrWarpSize - {"CU_DEVICE_ATTRIBUTE_WARP_SIZE", {"hipDeviceAttributeWarpSize", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 10 - // cudaDevAttrMaxPitch - {"CU_DEVICE_ATTRIBUTE_MAX_PITCH", {"hipDeviceAttributeMaxPitch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 11 - // cudaDevAttrMaxRegistersPerBlock - {"CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK", {"hipDeviceAttributeMaxRegistersPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 12 - // no analogue - {"CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK", {"hipDeviceAttributeMaxRegistersPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 12 - // cudaDevAttrClockRate - {"CU_DEVICE_ATTRIBUTE_CLOCK_RATE", {"hipDeviceAttributeClockRate", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 13 - // cudaDevAttrTextureAlignment - {"CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT", {"hipDeviceAttributeTextureAlignment", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 14 - // cudaDevAttrGpuOverlap - // NOTE: Deprecated, use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT - {"CU_DEVICE_ATTRIBUTE_GPU_OVERLAP", {"hipDeviceAttributeAsyncEngineCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 15 - // cudaDevAttrMultiProcessorCount - {"CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT", {"hipDeviceAttributeMultiprocessorCount", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 16 - // cudaDevAttrKernelExecTimeout - {"CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT", {"hipDeviceAttributeKernelExecTimeout", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 17 - // cudaDevAttrIntegrated - {"CU_DEVICE_ATTRIBUTE_INTEGRATED", {"hipDeviceAttributeIntegrated", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 18 - // cudaDevAttrCanMapHostMemory - {"CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY", {"hipDeviceAttributeCanMapHostMemory", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 19 - // cudaDevAttrComputeMode - {"CU_DEVICE_ATTRIBUTE_COMPUTE_MODE", {"hipDeviceAttributeComputeMode", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 20 - // cudaDevAttrMaxTexture1DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH", {"hipDeviceAttributeMaxTexture1DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 21 - // cudaDevAttrMaxTexture2DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH", {"hipDeviceAttributeMaxTexture2DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 22 - // cudaDevAttrMaxTexture2DHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT", {"hipDeviceAttributeMaxTexture2DHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 23 - // cudaDevAttrMaxTexture3DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH", {"hipDeviceAttributeMaxTexture3DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 24 - // cudaDevAttrMaxTexture3DHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT", {"hipDeviceAttributeMaxTexture3DHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 25 - // cudaDevAttrMaxTexture3DDepth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH", {"hipDeviceAttributeMaxTexture3DDepth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 26 - // cudaDevAttrMaxTexture2DLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH", {"hipDeviceAttributeMaxTexture2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 27 - // cudaDevAttrMaxTexture2DLayeredHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT", {"hipDeviceAttributeMaxTexture2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 28 - // cudaDevAttrMaxTexture2DLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS", {"hipDeviceAttributeMaxTexture2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 29 - // cudaDevAttrMaxTexture2DLayeredWidth - // NOTE: Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH", {"hipDeviceAttributeMaxTexture2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 27 - // cudaDevAttrMaxTexture2DLayeredHeight - // NOTE: Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT", {"hipDeviceAttributeMaxTexture2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 28 - // cudaDevAttrMaxTexture2DLayeredLayers - // NOTE: Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES", {"hipDeviceAttributeMaxTexture2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 29 - // cudaDevAttrSurfaceAlignment - {"CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT", {"hipDeviceAttributeSurfaceAlignment", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 30 - // cudaDevAttrConcurrentKernels - {"CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS", {"hipDeviceAttributeConcurrentKernels", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 31 - // cudaDevAttrEccEnabled - {"CU_DEVICE_ATTRIBUTE_ECC_ENABLED", {"hipDeviceAttributeEccEnabled", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 32 - // cudaDevAttrPciBusId - {"CU_DEVICE_ATTRIBUTE_PCI_BUS_ID", {"hipDeviceAttributePciBusId", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 33 - // cudaDevAttrPciDeviceId - {"CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID", {"hipDeviceAttributePciDeviceId", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 34 - // cudaDevAttrTccDriver - {"CU_DEVICE_ATTRIBUTE_TCC_DRIVER", {"hipDeviceAttributeTccDriver", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 35 - // cudaDevAttrMemoryClockRate - {"CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE", {"hipDeviceAttributeMemoryClockRate", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 36 - // cudaDevAttrGlobalMemoryBusWidth - {"CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH", {"hipDeviceAttributeMemoryBusWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 37 - // cudaDevAttrL2CacheSize - {"CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE", {"hipDeviceAttributeL2CacheSize", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 38 - // cudaDevAttrMaxThreadsPerMultiProcessor - {"CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR", {"hipDeviceAttributeMaxThreadsPerMultiProcessor", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 39 - // cudaDevAttrAsyncEngineCount - {"CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT", {"hipDeviceAttributeAsyncEngineCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 40 - // cudaDevAttrUnifiedAddressing - {"CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING", {"hipDeviceAttributeUnifiedAddressing", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 41 - // cudaDevAttrMaxTexture1DLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH", {"hipDeviceAttributeMaxTexture1DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 42 - // cudaDevAttrMaxTexture1DLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS", {"hipDeviceAttributeMaxTexture1DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 43 - // no analogue - // NOTE: Deprecated, do not use - {"CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER", {"hipDeviceAttributeCanTex2DGather", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 44 - // cudaDevAttrMaxTexture2DGatherWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH", {"hipDeviceAttributeMaxTexture2DGatherWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 45 - // cudaDevAttrMaxTexture2DGatherHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT", {"hipDeviceAttributeMaxTexture2DGatherHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 46 - // cudaDevAttrMaxTexture3DWidthAlt - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE", {"hipDeviceAttributeMaxTexture3DWidthAlternate", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 47 - // cudaDevAttrMaxTexture3DHeightAlt - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE", {"hipDeviceAttributeMaxTexture3DHeightAlternate", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 48 - // cudaDevAttrMaxTexture3DDepthAlt - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE", {"hipDeviceAttributeMaxTexture3DDepthAlternate", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 49 - // cudaDevAttrPciDomainId - {"CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID", {"hipDeviceAttributePciDomainId", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 50 - // cudaDevAttrTexturePitchAlignment - {"CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT", {"hipDeviceAttributeTexturePitchAlignment", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 51 - // cudaDevAttrMaxTextureCubemapWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH", {"hipDeviceAttributeMaxTextureCubemapWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 52 - // cudaDevAttrMaxTextureCubemapLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH", {"hipDeviceAttributeMaxTextureCubemapLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 53 - // cudaDevAttrMaxTextureCubemapLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS", {"hipDeviceAttributeMaxTextureCubemapLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 54 - // cudaDevAttrMaxSurface1DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH", {"hipDeviceAttributeMaxSurface1DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 55 - // cudaDevAttrMaxSurface2DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH", {"hipDeviceAttributeMaxSurface2DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 56 - // cudaDevAttrMaxSurface2DHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT", {"hipDeviceAttributeMaxSurface2DHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 57 - // cudaDevAttrMaxSurface3DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH", {"hipDeviceAttributeMaxSurface3DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 58 - // cudaDevAttrMaxSurface3DHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT", {"hipDeviceAttributeMaxSurface3DHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 59 - // cudaDevAttrMaxSurface3DDepth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH", {"hipDeviceAttributeMaxSurface3DDepth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 60 - // cudaDevAttrMaxSurface1DLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH", {"hipDeviceAttributeMaxSurface1DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 61 - // cudaDevAttrMaxSurface1DLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS", {"hipDeviceAttributeMaxSurface1DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 62 - // cudaDevAttrMaxSurface2DLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH", {"hipDeviceAttributeMaxSurface2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 63 - // cudaDevAttrMaxSurface2DLayeredHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT", {"hipDeviceAttributeMaxSurface2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 64 - // cudaDevAttrMaxSurface2DLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS", {"hipDeviceAttributeMaxSurface2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 65 - // cudaDevAttrMaxSurfaceCubemapWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH", {"hipDeviceAttributeMaxSurfaceCubemapWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 66 - // cudaDevAttrMaxSurfaceCubemapLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH", {"hipDeviceAttributeMaxSurfaceCubemapLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 67 - // cudaDevAttrMaxSurfaceCubemapLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS", {"hipDeviceAttributeMaxSurfaceCubemapLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 68 - // cudaDevAttrMaxTexture1DLinearWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH", {"hipDeviceAttributeMaxTexture1DLinearWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 69 - // cudaDevAttrMaxTexture2DLinearWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH", {"hipDeviceAttributeMaxTexture2DLinearWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 70 - // cudaDevAttrMaxTexture2DLinearHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT", {"hipDeviceAttributeMaxTexture2DLinearHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 71 - // cudaDevAttrMaxTexture2DLinearPitch - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH", {"hipDeviceAttributeMaxTexture2DLinearPitch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 72 - // cudaDevAttrMaxTexture2DMipmappedWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH", {"hipDeviceAttributeMaxTexture2DMipmappedWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 73 - // cudaDevAttrMaxTexture2DMipmappedHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT", {"hipDeviceAttributeMaxTexture2DMipmappedHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 74 - // cudaDevAttrComputeCapabilityMajor - {"CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR", {"hipDeviceAttributeComputeCapabilityMajor", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 75 - // cudaDevAttrComputeCapabilityMinor - {"CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR", {"hipDeviceAttributeComputeCapabilityMinor", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 76 - // cudaDevAttrMaxTexture1DMipmappedWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH", {"hipDeviceAttributeMaxTexture1DMipmappedWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 77 - // cudaDevAttrStreamPrioritiesSupported - {"CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED", {"hipDeviceAttributeStreamPrioritiesSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 78 - // cudaDevAttrGlobalL1CacheSupported - {"CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED", {"hipDeviceAttributeGlobalL1CacheSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 79 - // cudaDevAttrLocalL1CacheSupported - {"CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED", {"hipDeviceAttributeLocalL1CacheSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 80 - // cudaDevAttrMaxSharedMemoryPerMultiprocessor - {"CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR", {"hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 81 - // cudaDevAttrMaxRegistersPerMultiprocessor - {"CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR", {"hipDeviceAttributeMaxRegistersPerMultiprocessor", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 82 - // cudaDevAttrManagedMemory - {"CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY", {"hipDeviceAttributeManagedMemory", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 83 - // cudaDevAttrIsMultiGpuBoard - {"CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD", {"hipDeviceAttributeIsMultiGpuBoard", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 84 - // cudaDevAttrMultiGpuBoardGroupID - {"CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID", {"hipDeviceAttributeMultiGpuBoardGroupId", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 85 - // cudaDevAttrHostNativeAtomicSupported - {"CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED", {"hipDeviceAttributeHostNativeAtomicSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 86 - // cudaDevAttrSingleToDoublePrecisionPerfRatio - {"CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO", {"hipDeviceAttributeSingleToDoublePrecisionPerfRatio", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 87 - // cudaDevAttrPageableMemoryAccess - {"CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS", {"hipDeviceAttributePageableMemoryAccess", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 88 - // cudaDevAttrConcurrentManagedAccess - {"CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS", {"hipDeviceAttributeConcurrentManagedAccess", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 89 - // cudaDevAttrComputePreemptionSupported - {"CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED", {"hipDeviceAttributeComputePreemptionSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 90 - // cudaDevAttrCanUseHostPointerForRegisteredMem - {"CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM", {"hipDeviceAttributeCanUseHostPointerForRegisteredMem", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 91 - // no analogue: cudaDevAttrReserved92 - {"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS", {"hipDeviceAttributeCanUseStreamMemOps", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 92 - // no analogue: cudaDevAttrReserved93 - {"CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS", {"hipDeviceAttributeCanUse64BitStreamMemOps", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 93 - // no analogue: cudaDevAttrReserved94 - {"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR", {"hipDeviceAttributeCanUseStreamWaitValueNor", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 94 - // cudaDevAttrCooperativeLaunch - {"CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 95 - // cudaDevAttrCooperativeMultiDeviceLaunch - {"CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 96 - // cudaDevAttrMaxSharedMemoryPerBlockOptin - {"CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN", {"hipDeviceAttributeMaxSharedMemoryPerBlockOptin", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 97 - // cudaDevAttrCanFlushRemoteWrites - {"CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES", {"hipDeviceAttributeCanFlushRemoteWrites", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 98 - // cudaDevAttrHostRegisterSupported - {"CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED", {"hipDeviceAttributeHostRegisterSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 99 - // cudaDevAttrPageableMemoryAccessUsesHostPageTables - {"CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES", {"hipDeviceAttributePageableMemoryAccessUsesHostPageTables", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 100 - // cudaDevAttrDirectManagedMemAccessFromHost - {"CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST", {"hipDeviceAttributeDirectManagedMemAccessFromHost", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 101 - // no analogue - {"CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualAddressManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 102 - // no analogue - {"CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED", {"hipDeviceAttributeHandleTypePosixFileDescriptorSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 103 - // no analogue - {"CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED", {"hipDeviceAttributeHandleTypeWin32HandleSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 104 - // no analogue - {"CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED", {"hipDeviceAttributeHandleTypeWin32KmtHandleSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 105 - // no analogue - {"CU_DEVICE_ATTRIBUTE_MAX", {"hipDeviceAttributeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 106 - - // cudaDeviceP2PAttr - {"CUdevice_P2PAttribute", {"hipDeviceP2PAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUdevice_P2PAttribute_enum", {"hipDeviceP2PAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUdevice_P2PAttribute enum values - // cudaDevP2PAttrPerformanceRank = 1 - {"CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK", {"hipDeviceP2PAttributePerformanceRank", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaDevP2PAttrAccessSupported = 2 - {"CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED", {"hipDeviceP2PAttributeAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaDevP2PAttrNativeAtomicSupported = 3 - {"CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED", {"hipDeviceP2PAttributeNativeAtomicSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - // cudaDevP2PAttrCudaArrayAccessSupported = 4 - // NOTE" deprecated, use CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED instead - {"CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED", {"hipDevP2PAttributeCudaArrayAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // NOTE" deprecated, use CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED instead - {"CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED", {"hipDevP2PAttributeCudaArrayAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaDevP2PAttrCudaArrayAccessSupported = 4 - {"CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED", {"hipDevP2PAttributeCudaArrayAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - - // cudaEGL.h - presented only on Linux in nvidia-cuda-dev package - // cudaEglColorFormat - {"CUeglColorFormat", {"hipEglColorFormat", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUeglColorFormate_enum", {"hipEglColorFormat", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUeglColorFormat enum values - // cudaEglColorFormatYUV420Planar = 0 - {"CU_EGL_COLOR_FORMAT_YUV420_PLANAR", {"hipEglColorFormatYUV420Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaEglColorFormatYUV420SemiPlanar = 1 - {"CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR", {"hipEglColorFormatYUV420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaEglColorFormatYUV422Planar = 2 - {"CU_EGL_COLOR_FORMAT_YUV422_PLANAR", {"hipEglColorFormatYUV422Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaEglColorFormatYUV422SemiPlanar = 3 - {"CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR", {"hipEglColorFormatYUV422SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - // cudaEglColorFormatRGB = 4 - {"CU_EGL_COLOR_FORMAT_RGB", {"hipEglColorFormatRGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaEglColorFormatBGR = 5 - {"CU_EGL_COLOR_FORMAT_BGR", {"hipEglColorFormatBGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x05 - // cudaEglColorFormatARGB = 6 - {"CU_EGL_COLOR_FORMAT_ARGB", {"hipEglColorFormatARGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x06 - // cudaEglColorFormatRGBA = 7 - {"CU_EGL_COLOR_FORMAT_RGBA", {"hipEglColorFormatRGBA", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x07 - // cudaEglColorFormatL = 8 - {"CU_EGL_COLOR_FORMAT_L", {"hipEglColorFormatL", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x08 - // cudaEglColorFormatR = 9 - {"CU_EGL_COLOR_FORMAT_R", {"hipEglColorFormatR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x09 - // cudaEglColorFormatYUV444Planar = 10 - {"CU_EGL_COLOR_FORMAT_YUV444_PLANAR", {"hipEglColorFormatYUV444Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0A - // cudaEglColorFormatYUV444SemiPlanar = 11 - {"CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR", {"hipEglColorFormatYUV444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0B - // cudaEglColorFormatYUYV422 = 12 - {"CU_EGL_COLOR_FORMAT_YUYV_422", {"hipEglColorFormatYUYV422", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0C - // cudaEglColorFormatUYVY422 = 13 - {"CU_EGL_COLOR_FORMAT_UYVY_422", {"hipEglColorFormatUYVY422", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0D - // cudaEglColorFormatABGR = 14 - {"CU_EGL_COLOR_FORMAT_ABGR", {"hipEglColorFormatABGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0E - // cudaEglColorFormatBGRA = 15 - {"CU_EGL_COLOR_FORMAT_BGRA", {"hipEglColorFormatBGRA", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0F - // cudaEglColorFormatA = 16 - {"CU_EGL_COLOR_FORMAT_A", {"hipEglColorFormatA", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x10 - // cudaEglColorFormatRG = 17 - {"CU_EGL_COLOR_FORMAT_RG", {"hipEglColorFormatRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x11 - // cudaEglColorFormatAYUV = 18 - {"CU_EGL_COLOR_FORMAT_AYUV", {"hipEglColorFormatAYUV", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x12 - // cudaEglColorFormatYVU444SemiPlanar = 19 - {"CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR", {"hipEglColorFormatYVU444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x13 - // cudaEglColorFormatYVU422SemiPlanar = 20 - {"CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR", {"hipEglColorFormatYVU422SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x14 - // cudaEglColorFormatYVU420SemiPlanar = 21 - {"CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR", {"hipEglColorFormatYVU420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x15 - // cudaEglColorFormatYVU420SemiPlanar = 22 - {"CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR", {"hipEglColorFormatY10V10U10_444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x16 - // cudaEglColorFormatY10V10U10_420SemiPlanar = 23 - {"CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR", {"hipEglColorFormatY10V10U10_420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x17 - // cudaEglColorFormatY12V12U12_444SemiPlanar = 24 - {"CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR", {"hipEglColorFormatY12V12U12_444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x18 - // cudaEglColorFormatY12V12U12_420SemiPlanar = 25 - {"CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR", {"hipEglColorFormatY12V12U12_420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x19 - // cudaEglColorFormatVYUY_ER = 26 - {"CU_EGL_COLOR_FORMAT_VYUY_ER", {"hipEglColorFormatVYUY_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1A - // cudaEglColorFormatUYVY_ER = 27 - {"CU_EGL_COLOR_FORMAT_UYVY_ER", {"hipEglColorFormatUYVY_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1B - // cudaEglColorFormatYUYV_ER = 28 - {"CU_EGL_COLOR_FORMAT_YUYV_ER", {"hipEglColorFormatYUYV_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1C - // cudaEglColorFormatYVYU_ER = 29 - {"CU_EGL_COLOR_FORMAT_YVYU_ER", {"hipEglColorFormatYVYU_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1D - // cudaEglColorFormatYUV_ER = 30 - {"CU_EGL_COLOR_FORMAT_YUV_ER", {"hipEglColorFormatYUV_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1E - // cudaEglColorFormatYUVA_ER = 31 - {"CU_EGL_COLOR_FORMAT_YUVA_ER", {"hipEglColorFormatYUVA_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1F - // cudaEglColorFormatAYUV_ER = 32 - {"CU_EGL_COLOR_FORMAT_AYUV_ER", {"hipEglColorFormatAYUV_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x20 - // cudaEglColorFormatYUV444Planar_ER = 33 - {"CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER", {"hipEglColorFormatYUV444Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x21 - // cudaEglColorFormatYUV422Planar_ER = 34 - {"CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER", {"hipEglColorFormatYUV422Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x22 - // cudaEglColorFormatYUV420Planar_ER = 35 - {"CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER", {"hipEglColorFormatYUV420Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x23 - // cudaEglColorFormatYUV444SemiPlanar_ER = 36 - {"CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER", {"hipEglColorFormatYUV444SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x24 - // cudaEglColorFormatYUV422SemiPlanar_ER = 37 - {"CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER", {"hipEglColorFormatYUV422SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x25 - // cudaEglColorFormatYUV420SemiPlanar_ER = 38 - {"CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER", {"hipEglColorFormatYUV420SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x26 - // cudaEglColorFormatYVU444Planar_ER = 39 - {"CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER", {"hipEglColorFormatYVU444Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x27 - // cudaEglColorFormatYVU422Planar_ER = 40 - {"CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER", {"hipEglColorFormatYVU422Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x28 - // cudaEglColorFormatYVU420Planar_ER = 41 - {"CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER", {"hipEglColorFormatYVU420Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x29 - // cudaEglColorFormatYVU444SemiPlanar_ER = 42 - {"CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER", {"hipEglColorFormatYVU444SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2A - // cudaEglColorFormatYVU422SemiPlanar_ER = 43 - {"CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER", {"hipEglColorFormatYVU422SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2B - // cudaEglColorFormatYVU420SemiPlanar_ER = 44 - {"CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER", {"hipEglColorFormatYVU420SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2C - // cudaEglColorFormatBayerRGGB = 45 - {"CU_EGL_COLOR_FORMAT_BAYER_RGGB", {"hipEglColorFormatBayerRGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2D - // cudaEglColorFormatBayerBGGR = 46 - {"CU_EGL_COLOR_FORMAT_BAYER_BGGR", {"hipEglColorFormatBayerBGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2E - // cudaEglColorFormatBayerGRBG = 47 - {"CU_EGL_COLOR_FORMAT_BAYER_GRBG", {"hipEglColorFormatBayerGRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2F - // cudaEglColorFormatBayerGBRG = 48 - {"CU_EGL_COLOR_FORMAT_BAYER_GBRG", {"hipEglColorFormatBayerGBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x30 - // cudaEglColorFormatBayer10RGGB = 49 - {"CU_EGL_COLOR_FORMAT_BAYER10_RGGB", {"hipEglColorFormatBayer10RGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x31 - // cudaEglColorFormatBayer10BGGR = 50 - {"CU_EGL_COLOR_FORMAT_BAYER10_BGGR", {"hipEglColorFormatBayer10BGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x32 - // cudaEglColorFormatBayer10GRBG = 51 - {"CU_EGL_COLOR_FORMAT_BAYER10_GRBG", {"hipEglColorFormatBayer10GRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x33 - // cudaEglColorFormatBayer10GBRG = 52 - {"CU_EGL_COLOR_FORMAT_BAYER10_GBRG", {"hipEglColorFormatBayer10GBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x34 - // cudaEglColorFormatBayer12RGGB = 53 - {"CU_EGL_COLOR_FORMAT_BAYER12_RGGB", {"hipEglColorFormatBayer12RGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x35 - // cudaEglColorFormatBayer12BGGR = 54 - {"CU_EGL_COLOR_FORMAT_BAYER12_BGGR", {"hipEglColorFormatBayer12BGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x36 - // cudaEglColorFormatBayer12GRBG = 55 - {"CU_EGL_COLOR_FORMAT_BAYER12_GRBG", {"hipEglColorFormatBayer12GRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x37 - // cudaEglColorFormatBayer12GBRG = 56 - {"CU_EGL_COLOR_FORMAT_BAYER12_GBRG", {"hipEglColorFormatBayer12GBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x38 - // cudaEglColorFormatBayer14RGGB = 57 - {"CU_EGL_COLOR_FORMAT_BAYER14_RGGB", {"hipEglColorFormatBayer14RGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x39 - // cudaEglColorFormatBayer14BGGR = 58 - {"CU_EGL_COLOR_FORMAT_BAYER14_BGGR", {"hipEglColorFormatBayer14BGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3A - // cudaEglColorFormatBayer14GRBG = 59 - {"CU_EGL_COLOR_FORMAT_BAYER14_GRBG", {"hipEglColorFormatBayer14GRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3B - // cudaEglColorFormatBayer14GBRG = 60 - {"CU_EGL_COLOR_FORMAT_BAYER14_GBRG", {"hipEglColorFormatBayer14GBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3C - // cudaEglColorFormatBayer20RGGB = 61 - {"CU_EGL_COLOR_FORMAT_BAYER20_RGGB", {"hipEglColorFormatBayer20RGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3D - // cudaEglColorFormatBayer20BGGR = 62 - {"CU_EGL_COLOR_FORMAT_BAYER20_BGGR", {"hipEglColorFormatBayer20BGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3E - // cudaEglColorFormatBayer20GRBG = 63 - {"CU_EGL_COLOR_FORMAT_BAYER20_GRBG", {"hipEglColorFormatBayer20GRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3F - // cudaEglColorFormatBayer20GBRG = 64 - {"CU_EGL_COLOR_FORMAT_BAYER20_GBRG", {"hipEglColorFormatBayer20GBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x40 - // cudaEglColorFormatYVU444Planar = 65 - {"CU_EGL_COLOR_FORMAT_YVU444_PLANAR", {"hipEglColorFormatYVU444Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x41 - // cudaEglColorFormatYVU422Planar = 66 - {"CU_EGL_COLOR_FORMAT_YVU422_PLANAR", {"hipEglColorFormatYVU422Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x42 - // cudaEglColorFormatYVU420Planar = 67 - {"CU_EGL_COLOR_FORMAT_YVU420_PLANAR", {"hipEglColorFormatYVU420Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x43 - // cudaEglColorFormatBayerIspRGGB = 68 - {"CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB", {"hipEglColorFormatBayerIspRGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x44 - // cudaEglColorFormatBayerIspBGGR = 69 - {"CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR", {"hipEglColorFormatBayerIspBGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x45 - // cudaEglColorFormatBayerIspGRBG = 70 - {"CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG", {"hipEglColorFormatBayerIspGRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x46 - // cudaEglColorFormatBayerIspGBRG = 71 - {"CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG", {"hipEglColorFormatBayerIspGBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x47 - // no analogue - {"CU_EGL_COLOR_FORMAT_MAX", {"hipEglColorFormatMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x48 - - // cudaEglFrameType - {"CUeglFrameType", {"hipEglFrameType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUeglFrameType_enum", {"hipEglFrameType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUeglFrameType enum values - // cudaEglFrameTypeArray - {"CU_EGL_FRAME_TYPE_ARRAY", {"hipEglFrameTypeArray", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // cudaEglFrameTypePitch - {"CU_EGL_FRAME_TYPE_PITCH", {"hipEglFrameTypePitch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - - // cudaEglResourceLocationFlags - {"CUeglResourceLocationFlags", {"hipEglResourceLocationFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUeglResourceLocationFlags_enum", {"hipEglResourceLocationFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUeglResourceLocationFlags enum values - // cudaEglResourceLocationSysmem - {"CU_EGL_RESOURCE_LOCATION_SYSMEM", {"hipEglResourceLocationSysmem", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaEglResourceLocationVidmem - {"CU_EGL_RESOURCE_LOCATION_VIDMEM", {"hipEglResourceLocationVidmem", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - - // no analogue - {"CUevent_flags", {"hipEventFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUevent_flags_enum", {"hipEventFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUevent_flags enum values - // cudaEventDefault - {"CU_EVENT_DEFAULT", {"hipEventDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaEventBlockingSync - {"CU_EVENT_BLOCKING_SYNC", {"hipEventBlockingSync", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaEventDisableTiming - {"CU_EVENT_DISABLE_TIMING", {"hipEventDisableTiming", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaEventInterprocess - {"CU_EVENT_INTERPROCESS", {"hipEventInterprocess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - - // cudaExternalMemoryHandleType - {"CUexternalMemoryHandleType", {"hipExternalMemoryHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUexternalMemoryHandleType_enum", {"hipExternalMemoryHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUexternalMemoryHandleType enum values - // cudaExternalMemoryHandleTypeOpaqueFd - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD", {"hipExternalMemoryHandleTypeOpaqueFD", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaExternalMemoryHandleTypeOpaqueWin32 - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32", {"hipExternalMemoryHandleTypeOpaqueWin32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaExternalMemoryHandleTypeOpaqueWin32Kmt - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT", {"hipExternalMemoryHandleTypeOpaqueWin32KMT", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaExternalMemoryHandleTypeD3D12Heap - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP", {"hipExternalMemoryHandleTypeD3D12Heap", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - // cudaExternalMemoryHandleTypeD3D12Resource - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE", {"hipExternalMemoryHandleTypeD3D12Resource", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - // cudaExternalMemoryHandleTypeD3D11Resource - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE", {"hipExternalMemoryHandleTypeD3D11Resource", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - // cudaExternalMemoryHandleTypeD3D11ResourceKmt - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT", {"hipExternalMemoryHandleTypeD3D11ResourceKmt", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 7 - // cudaExternalMemoryHandleTypeNvSciBuf - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF", {"hipExternalMemoryHandleTypeNvSciBuf", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 8 - - // cudaExternalSemaphoreHandleType - {"CUexternalSemaphoreHandleType", {"hipExternalSemaphoreHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUexternalSemaphoreHandleType_enum", {"hipExternalSemaphoreHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUexternalSemaphoreHandleType enum values - // cudaExternalSemaphoreHandleTypeOpaqueFd - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD", {"hipExternalSemaphoreHandleTypeOpaqueFD", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaExternalSemaphoreHandleTypeOpaqueWin32 - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32", {"hipExternalSemaphoreHandleTypeOpaqueWin32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT", {"hipExternalSemaphoreHandleTypeOpaqueWin32KMT", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaExternalSemaphoreHandleTypeD3D12Fence - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE", {"hipExternalSemaphoreHandleTypeD3D12Fence", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - - // cudaExternalSemaphoreHandleTypeD3D11Fence - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE", {"hipExternalSemaphoreHandleTypeD3D11Fence", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - // cudaExternalSemaphoreHandleTypeNvSciSync - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC", {"hipExternalSemaphoreHandleTypeNvSciSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - // cudaExternalSemaphoreHandleTypeKeyedMutex - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX", {"hipExternalSemaphoreHandleTypeKeyedMutex", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 7 - // cudaExternalSemaphoreHandleTypeKeyedMutexKmt - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT", {"hipExternalSemaphoreHandleTypeKeyedMutexKmt", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 8 - - // cudaTextureFilterMode - {"CUfilter_mode", {"hipTextureFilterMode", "", CONV_TYPE, API_DRIVER}}, - {"CUfilter_mode_enum", {"hipTextureFilterMode", "", CONV_TYPE, API_DRIVER}}, - // CUfilter_mode enum values - // cudaFilterModePoint - {"CU_TR_FILTER_MODE_POINT", {"hipFilterModePoint", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - // cudaFilterModeLinear - {"CU_TR_FILTER_MODE_LINEAR", {"hipFilterModeLinear", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - - // cudaFuncCache - {"CUfunc_cache", {"hipFuncCache_t", "", CONV_TYPE, API_DRIVER}}, - {"CUfunc_cache_enum", {"hipFuncCache_t", "", CONV_TYPE, API_DRIVER}}, - // CUfunc_cache enum values - // cudaFilterModePoint = 0 - {"CU_FUNC_CACHE_PREFER_NONE", {"hipFuncCachePreferNone", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaFuncCachePreferShared = 1 - {"CU_FUNC_CACHE_PREFER_SHARED", {"hipFuncCachePreferShared", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaFuncCachePreferL1 = 2 - {"CU_FUNC_CACHE_PREFER_L1", {"hipFuncCachePreferL1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaFuncCachePreferEqual = 3 - {"CU_FUNC_CACHE_PREFER_EQUAL", {"hipFuncCachePreferEqual", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - - // cudaFuncAttribute - {"CUfunction_attribute", {"hipFunction_attribute", "", CONV_TYPE, API_DRIVER}}, - {"CUfunction_attribute_enum", {"hipFunction_attribute", "", CONV_TYPE, API_DRIVER}}, - // CUfunction_attribute enum values - // no analogue - {"CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK", {"HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - // no analogue - {"CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - // no analogue - {"CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - // no analogue - {"CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - // no analogue - {"CU_FUNC_ATTRIBUTE_NUM_REGS", {"HIP_FUNC_ATTRIBUTE_NUM_REGS", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 4 - // no analogue - {"CU_FUNC_ATTRIBUTE_PTX_VERSION", {"HIP_FUNC_ATTRIBUTE_PTX_VERSION", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 5 - // no analogue - {"CU_FUNC_ATTRIBUTE_BINARY_VERSION", {"HIP_FUNC_ATTRIBUTE_BINARY_VERSION", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 6 - // no analogue - {"CU_FUNC_ATTRIBUTE_CACHE_MODE_CA", {"HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 7 - // cudaFuncAttributeMaxDynamicSharedMemorySize - {"CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 8 - // cudaFuncAttributePreferredSharedMemoryCarveout - {"CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {"HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 9 - // cudaFuncAttributeMax - {"CU_FUNC_ATTRIBUTE_MAX", {"HIP_FUNC_ATTRIBUTE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 10 - - //cudaGraphicsMapFlags - {"CUgraphicsMapResourceFlags", {"hipGraphicsMapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUgraphicsMapResourceFlags_enum", {"hipGraphicsMapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUgraphicsMapResourceFlags enum values - // cudaGraphicsMapFlagsNone = 0 - {"CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE", {"hipGraphicsMapFlagsNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaGraphicsMapFlagsReadOnly = 1 - {"CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY", {"hipGraphicsMapFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaGraphicsMapFlagsWriteDiscard = 2 - {"CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD", {"hipGraphicsMapFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - - // cudaGraphicsRegisterFlags - {"CUgraphicsRegisterFlags", {"hipGraphicsRegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUgraphicsRegisterFlags_enum", {"hipGraphicsRegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsRegisterFlags enum values - //cudaGraphicsRegisterFlagsNone = 0 - {"CU_GRAPHICS_REGISTER_FLAGS_NONE", {"hipGraphicsRegisterFlagsNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaGraphicsRegisterFlagsReadOnly = 1 - {"CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY", {"hipGraphicsRegisterFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - //cudaGraphicsRegisterFlagsWriteDiscard = 2 - {"CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD", {"hipGraphicsRegisterFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaGraphicsRegisterFlagsSurfaceLoadStore = 4 - {"CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST", {"hipGraphicsRegisterFlagsSurfaceLoadStore", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaGraphicsRegisterFlagsTextureGather = 8 - {"CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER", {"hipGraphicsRegisterFlagsTextureGather", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x08 - - // cudaGraphNodeType - {"CUgraphNodeType", {"hipGraphNodeType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUgraphNodeType_enum", {"hipGraphNodeType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeType enum values - // cudaGraphNodeTypeKernel = 0x00 - {"CU_GRAPH_NODE_TYPE_KERNEL", {"hipGraphNodeTypeKernel", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // cudaGraphNodeTypeMemcpy = 0x01 - {"CU_GRAPH_NODE_TYPE_MEMCPY", {"hipGraphNodeTypeMemcpy", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaGraphNodeTypeMemset = 0x02 - {"CU_GRAPH_NODE_TYPE_MEMSET", {"hipGraphNodeTypeMemset", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaGraphNodeTypeHost = 0x03 - {"CU_GRAPH_NODE_TYPE_HOST", {"hipGraphNodeTypeHost", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaGraphNodeTypeGraph = 0x04 - {"CU_GRAPH_NODE_TYPE_GRAPH", {"hipGraphNodeTypeGraph", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - // cudaGraphNodeTypeEmpty = 0x05 - {"CU_GRAPH_NODE_TYPE_EMPTY", {"hipGraphNodeTypeEmpty", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - // cudaGraphNodeTypeCount - {"CU_GRAPH_NODE_TYPE_COUNT", {"hipGraphNodeTypeCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - - // cudaGraphExecUpdateResult - {"CUgraphExecUpdateResult", {"hipGraphExecUpdateResult", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUgraphExecUpdateResult_enum", {"hipGraphExecUpdateResult", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUgraphExecUpdateResult enum values - // cudaGraphExecUpdateSuccess - {"CU_GRAPH_EXEC_UPDATE_SUCCESS", {"hipGraphExecUpdateSuccess", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - // cudaGraphExecUpdateError - {"CU_GRAPH_EXEC_UPDATE_ERROR", {"hipGraphExecUpdateError", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - // cudaGraphExecUpdateErrorTopologyChanged - {"CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED", {"hipGraphExecUpdateErrorTopologyChanged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2 - // cudaGraphExecUpdateErrorNodeTypeChanged - {"CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED", {"hipGraphExecUpdateErrorNodeTypeChanged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3 - // cudaGraphExecUpdateErrorFunctionChanged - {"CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED", {"hipGraphExecUpdateErrorFunctionChanged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x4 - // cudaGraphExecUpdateErrorParametersChanged - {"CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED", {"hipGraphExecUpdateErrorParametersChanged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x5 - // cudaGraphExecUpdateErrorNotSupported - {"CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED", {"hipGraphExecUpdateErrorNotSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x6 - - // no analogue - {"CUipcMem_flags", {"hipIpcMemFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUipcMem_flags_enum", {"hipIpcMemFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUipcMem_flags enum values - // cudaIpcMemLazyEnablePeerAccess - {"CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS", {"hipIpcMemLazyEnablePeerAccess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1 - - // no analogue - {"CUjit_cacheMode", {"hipJitCacheMode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUjit_cacheMode_enum", {"hipJitCacheMode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUjit_cacheMode enum values - // no analogue - {"CU_JIT_CACHE_OPTION_NONE", {"hipJitCacheModeOptionNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // no analogue - {"CU_JIT_CACHE_OPTION_CG", {"hipJitCacheModeOptionCG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"CU_JIT_CACHE_OPTION_CA", {"hipJitCacheModeOptionCA", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUjit_fallback", {"hipJitFallback", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUjit_fallback_enum", {"hipJitFallback", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUjit_fallback enum values - {"CU_PREFER_PTX", {"hipJitFallbackPreferPtx", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - {"CU_PREFER_BINARY", {"hipJitFallbackPreferBinary", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: is not used by HIP, as it has no JIT, thus just a dummy enum - {"CUjit_option", {"hipJitOption", "", CONV_TYPE, API_DRIVER}}, - {"CUjit_option_enum", {"hipJitOption", "", CONV_TYPE, API_DRIVER}}, - // CUjit_option enum values - {"CU_JIT_MAX_REGISTERS", {"hipJitOptionMaxRegisters", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - {"CU_JIT_THREADS_PER_BLOCK", {"hipJitOptionThreadsPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_WALL_TIME", {"hipJitOptionWallTime", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_INFO_LOG_BUFFER", {"hipJitOptionInfoLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionInfoLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_ERROR_LOG_BUFFER", {"hipJitOptionErrorLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionErrorLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_OPTIMIZATION_LEVEL", {"hipJitOptionOptimizationLevel", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_TARGET_FROM_CUCONTEXT", {"hipJitOptionTargetFromContext", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_TARGET", {"hipJitOptionTarget", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_FALLBACK_STRATEGY", {"hipJitOptionFallbackStrategy", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GENERATE_DEBUG_INFO", {"hipJitOptionGenerateDebugInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_LOG_VERBOSE", {"hipJitOptionLogVerbose", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GENERATE_LINE_INFO", {"hipJitOptionGenerateLineInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_CACHE_MODE", {"hipJitOptionCacheMode", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_NEW_SM3X_OPT", {"hipJitOptionSm3xOpt", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_FAST_COMPILE", {"hipJitOptionFastCompile", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GLOBAL_SYMBOL_NAMES", {"hipJitGlobalSymbolNames", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GLOBAL_SYMBOL_ADDRESSES", {"hipJitGlobalSymbolAddresses", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GLOBAL_SYMBOL_COUNT", {"hipJitGlobalSymbolCount", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_NUM_OPTIONS", {"hipJitOptionNumOptions", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - - // no analogue - {"CUjit_target", {"hipJitTarget", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUjit_target_enum", {"hipJitTarget", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUjit_target enum values - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_10", {"hipJitTargetCompute10", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 10 - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_11", {"hipJitTargetCompute11", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 11 - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_12", {"hipJitTargetCompute12", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 12 - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_13", {"hipJitTargetCompute13", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 13 - {"CU_TARGET_COMPUTE_20", {"hipJitTargetCompute20", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 20 - {"CU_TARGET_COMPUTE_21", {"hipJitTargetCompute21", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 21 - {"CU_TARGET_COMPUTE_30", {"hipJitTargetCompute30", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 20 - {"CU_TARGET_COMPUTE_32", {"hipJitTargetCompute32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 32 - {"CU_TARGET_COMPUTE_35", {"hipJitTargetCompute35", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 35 - {"CU_TARGET_COMPUTE_37", {"hipJitTargetCompute37", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 37 - {"CU_TARGET_COMPUTE_50", {"hipJitTargetCompute50", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 50 - {"CU_TARGET_COMPUTE_52", {"hipJitTargetCompute52", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 52 - {"CU_TARGET_COMPUTE_53", {"hipJitTargetCompute53", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 53 - {"CU_TARGET_COMPUTE_60", {"hipJitTargetCompute60", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 60 - {"CU_TARGET_COMPUTE_61", {"hipJitTargetCompute61", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 61 - {"CU_TARGET_COMPUTE_62", {"hipJitTargetCompute62", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 62 - {"CU_TARGET_COMPUTE_70", {"hipJitTargetCompute70", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 70 - {"CU_TARGET_COMPUTE_72", {"hipJitTargetCompute72", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 70 - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_73", {"hipJitTargetCompute73", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 73 - {"CU_TARGET_COMPUTE_75", {"hipJitTargetCompute75", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 75 - - // no analogue - {"CUjitInputType", {"hipJitInputType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUjitInputType_enum", {"hipJitInputType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUjitInputType enum values - {"CU_JIT_INPUT_CUBIN", {"hipJitInputTypeBin", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - {"CU_JIT_INPUT_PTX", {"hipJitInputTypePtx", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_FATBINARY", {"hipJitInputTypeFatBinary", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_OBJECT", {"hipJitInputTypeObject", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_LIBRARY", {"hipJitInputTypeLibrary", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - {"CU_JIT_NUM_INPUT_TYPES", {"hipJitInputTypeNumInputTypes", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaLimit - {"CUlimit", {"hipLimit_t", "", CONV_TYPE, API_DRIVER}}, - {"CUlimit_enum", {"hipLimit_t", "", CONV_TYPE, API_DRIVER}}, - // CUlimit enum values - // cudaLimitStackSize - {"CU_LIMIT_STACK_SIZE", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - //cudaLimitPrintfFifoSize - {"CU_LIMIT_PRINTF_FIFO_SIZE", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - //cudaLimitMallocHeapSize - {"CU_LIMIT_MALLOC_HEAP_SIZE", {"hipLimitMallocHeapSize", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaLimitDevRuntimeSyncDepth - {"CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH", {"hipLimitDevRuntimeSyncDepth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - // cudaLimitDevRuntimePendingLaunchCount - {"CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT", {"hipLimitDevRuntimePendingLaunchCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaLimitMaxL2FetchGranularity - {"CU_LIMIT_MAX_L2_FETCH_GRANULARITY", {"hipLimitMaxL2FetchGranularity", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x05 - // no analogue - {"CU_LIMIT_MAX", {"hipLimitMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaMemoryAdvise - {"CUmem_advise", {"hipMemAdvise", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmem_advise_enum", {"hipMemAdvise", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmem_advise enum values - // cudaMemAdviseSetReadMostly - {"CU_MEM_ADVISE_SET_READ_MOSTLY", {"hipMemAdviseSetReadMostly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaMemAdviseUnsetReadMostly - {"CU_MEM_ADVISE_UNSET_READ_MOSTLY", {"hipMemAdviseUnsetReadMostly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaMemAdviseSetPreferredLocation - {"CU_MEM_ADVISE_SET_PREFERRED_LOCATION", {"hipMemAdviseSetPreferredLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaMemAdviseUnsetPreferredLocation - {"CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION", {"hipMemAdviseUnsetPreferredLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - // cudaMemAdviseSetAccessedBy - {"CU_MEM_ADVISE_SET_ACCESSED_BY", {"hipMemAdviseSetAccessedBy", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - // cudaMemAdviseUnsetAccessedBy - {"CU_MEM_ADVISE_UNSET_ACCESSED_BY", {"hipMemAdviseUnsetAccessedBy", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - - // no analogue - {"CUmemAttach_flags", {"hipMemAttachFlags_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAttach_flags_enum", {"hipMemAttachFlags_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemAttach_flags enum values - // cudaMemAttachGlobal - {"CU_MEM_ATTACH_GLOBAL", {"hipMemAttachGlobal", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1 - // cudaMemAttachHost - {"CU_MEM_ATTACH_HOST", {"hipMemAttachHost", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x2 - // cudaMemAttachSingle - {"CU_MEM_ATTACH_SINGLE", {"hipMemAttachSingle", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x4 - - // no analogue - // NOTE: cudaMemoryType is partial analogue - {"CUmemorytype", {"hipMemoryType", "", CONV_TYPE, API_DRIVER}}, - {"CUmemorytype_enum", {"hipMemoryType", "", CONV_TYPE, API_DRIVER}}, - // CUmemorytype enum values - {"CU_MEMORYTYPE_HOST", {"hipMemoryTypeHost", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - {"CU_MEMORYTYPE_DEVICE", {"hipMemoryTypeDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - {"CU_MEMORYTYPE_ARRAY", {"hipMemoryTypeArray", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - {"CU_MEMORYTYPE_UNIFIED", {"hipMemoryTypeUnified", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - - // cudaMemRangeAttribute - {"CUmem_range_attribute", {"hipMemRangeAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmem_range_attribute_enum", {"hipMemRangeAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmem_range_attribute enum values - // cudaMemRangeAttributeReadMostly - {"CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY", {"hipMemRangeAttributeReadMostly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaMemRangeAttributePreferredLocation - {"CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION", {"hipMemRangeAttributePreferredLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaMemRangeAttributeAccessedBy - {"CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY", {"hipMemRangeAttributeAccessedBy", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaMemRangeAttributeLastPrefetchLocation - {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION", {"hipMemRangeAttributeLastPrefetchLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - - //no analogue - {"CUoccupancy_flags", {"hipOccupancyFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUoccupancy_flags_enum", {"hipOccupancyFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUoccupancy_flags enum values - // cudaOccupancyDefault - {"CU_OCCUPANCY_DEFAULT", {"hipOccupancyDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaOccupancyDisableCachingOverride - {"CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE", {"hipOccupancyDisableCachingOverride", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - - //no analogue - // TODO: Analogous enum is needed in HIP. Couldn't map enum to struct hipPointerAttribute_t. - // TODO: Do the same for Pointer Attributes as for Device Attributes. - {"CUpointer_attribute", {"hipPointerAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUpointer_attribute_enum", {"hipPointerAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUpointer_attribute enum values - {"CU_POINTER_ATTRIBUTE_CONTEXT", {"hipPointerAttributeContext", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - {"CU_POINTER_ATTRIBUTE_MEMORY_TYPE", {"hipPointerAttributeMemoryType", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - {"CU_POINTER_ATTRIBUTE_DEVICE_POINTER", {"hipPointerAttributeDevicePointer", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - {"CU_POINTER_ATTRIBUTE_HOST_POINTER", {"hipPointerAttributeHostPointer", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - {"CU_POINTER_ATTRIBUTE_P2P_TOKENS", {"hipPointerAttributeP2pTokens", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - {"CU_POINTER_ATTRIBUTE_SYNC_MEMOPS", {"hipPointerAttributeSyncMemops", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - {"CU_POINTER_ATTRIBUTE_BUFFER_ID", {"hipPointerAttributeBufferId", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 7 - {"CU_POINTER_ATTRIBUTE_IS_MANAGED", {"hipPointerAttributeIsManaged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 8 - {"CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL", {"hipPointerAttributeDeviceOrdinal", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 9 - {"CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE", {"hipPointerAttributeIsLegacyCudaIpcCapable", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 10 - {"CU_POINTER_ATTRIBUTE_RANGE_START_ADDR", {"hipPointerAttributeRangeStartAddress", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 11 - {"CU_POINTER_ATTRIBUTE_RANGE_SIZE", {"hipPointerAttributeRangeSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 12 - {"CU_POINTER_ATTRIBUTE_MAPPED", {"hipPointerAttributeMapped", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 13 - {"CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES", {"hipPointerAttributeAllowedHandleTypes", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 14 - - // cudaResourceType - {"CUresourcetype", {"hipResourceType", "", CONV_TYPE, API_DRIVER}}, - {"CUresourcetype_enum", {"hipResourceType", "", CONV_TYPE, API_DRIVER}}, - // CUresourcetype enum values - // cudaResourceTypeArray - {"CU_RESOURCE_TYPE_ARRAY", {"hipResourceTypeArray", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - //cudaResourceTypeMipmappedArray - {"CU_RESOURCE_TYPE_MIPMAPPED_ARRAY", {"hipResourceTypeMipmappedArray", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - //cudaResourceTypeLinear - {"CU_RESOURCE_TYPE_LINEAR", {"hipResourceTypeLinear", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - //cudaResourceTypePitch2D - {"CU_RESOURCE_TYPE_PITCH2D", {"hipResourceTypePitch2D", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - - // cudaResourceViewFormat - {"CUresourceViewFormat", {"hipResourceViewFormat", "", CONV_TYPE, API_DRIVER}}, - {"CUresourceViewFormat_enum", {"hipResourceViewFormat", "", CONV_TYPE, API_DRIVER}}, - // CUresourceViewFormat enum values - // cudaResViewFormatNone - {"CU_RES_VIEW_FORMAT_NONE", {"hipResViewFormatNone", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaResViewFormatUnsignedChar1 - {"CU_RES_VIEW_FORMAT_UINT_1X8", {"hipResViewFormatUnsignedChar1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaResViewFormatUnsignedChar2 - {"CU_RES_VIEW_FORMAT_UINT_2X8", {"hipResViewFormatUnsignedChar2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaResViewFormatUnsignedChar4 - {"CU_RES_VIEW_FORMAT_UINT_4X8", {"hipResViewFormatUnsignedChar4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - // cudaResViewFormatSignedChar1 - {"CU_RES_VIEW_FORMAT_SINT_1X8", {"hipResViewFormatSignedChar1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - // cudaResViewFormatSignedChar2 - {"CU_RES_VIEW_FORMAT_SINT_2X8", {"hipResViewFormatSignedChar2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x05 - // cudaResViewFormatSignedChar4 - {"CU_RES_VIEW_FORMAT_SINT_4X8", {"hipResViewFormatSignedChar4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x06 - // cudaResViewFormatUnsignedShort1 - {"CU_RES_VIEW_FORMAT_UINT_1X16", {"hipResViewFormatUnsignedShort1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x07 - // cudaResViewFormatUnsignedShort2 - {"CU_RES_VIEW_FORMAT_UINT_2X16", {"hipResViewFormatUnsignedShort2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x08 - // cudaResViewFormatUnsignedShort4 - {"CU_RES_VIEW_FORMAT_UINT_4X16", {"hipResViewFormatUnsignedShort4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x09 - // cudaResViewFormatSignedShort1 - {"CU_RES_VIEW_FORMAT_SINT_1X16", {"hipResViewFormatSignedShort1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0a - // cudaResViewFormatSignedShort2 - {"CU_RES_VIEW_FORMAT_SINT_2X16", {"hipResViewFormatSignedShort2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0b - // cudaResViewFormatSignedShort4 - {"CU_RES_VIEW_FORMAT_SINT_4X16", {"hipResViewFormatSignedShort4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0c - // cudaResViewFormatUnsignedInt1 - {"CU_RES_VIEW_FORMAT_UINT_1X32", {"hipResViewFormatUnsignedInt1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0d - // cudaResViewFormatUnsignedInt2 - {"CU_RES_VIEW_FORMAT_UINT_2X32", {"hipResViewFormatUnsignedInt2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0e - // cudaResViewFormatUnsignedInt4 - {"CU_RES_VIEW_FORMAT_UINT_4X32", {"hipResViewFormatUnsignedInt4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0f - // cudaResViewFormatSignedInt1 - {"CU_RES_VIEW_FORMAT_SINT_1X32", {"hipResViewFormatSignedInt1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x10 - // cudaResViewFormatSignedInt2 - {"CU_RES_VIEW_FORMAT_SINT_2X32", {"hipResViewFormatSignedInt2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x11 - // cudaResViewFormatSignedInt4 - {"CU_RES_VIEW_FORMAT_SINT_4X32", {"hipResViewFormatSignedInt4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x12 - // cudaResViewFormatHalf1 - {"CU_RES_VIEW_FORMAT_FLOAT_1X16", {"hipResViewFormatHalf1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x13 - // cudaResViewFormatHalf2 - {"CU_RES_VIEW_FORMAT_FLOAT_2X16", {"hipResViewFormatHalf2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x14 - // cudaResViewFormatHalf4 - {"CU_RES_VIEW_FORMAT_FLOAT_4X16", {"hipResViewFormatHalf4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x15 - // cudaResViewFormatFloat1 - {"CU_RES_VIEW_FORMAT_FLOAT_1X32", {"hipResViewFormatFloat1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x16 - // cudaResViewFormatFloat2 - {"CU_RES_VIEW_FORMAT_FLOAT_2X32", {"hipResViewFormatFloat2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x17 - // cudaResViewFormatFloat4 - {"CU_RES_VIEW_FORMAT_FLOAT_4X32", {"hipResViewFormatFloat4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x18 - // cudaResViewFormatUnsignedBlockCompressed1 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC1", {"hipResViewFormatUnsignedBlockCompressed1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x19 - // cudaResViewFormatUnsignedBlockCompressed2 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC2", {"hipResViewFormatUnsignedBlockCompressed2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1a - // cudaResViewFormatUnsignedBlockCompressed3 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC3", {"hipResViewFormatUnsignedBlockCompressed3", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1b - // cudaResViewFormatUnsignedBlockCompressed4 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC4", {"hipResViewFormatUnsignedBlockCompressed4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1c - // cudaResViewFormatSignedBlockCompressed4 - {"CU_RES_VIEW_FORMAT_SIGNED_BC4", {"hipResViewFormatSignedBlockCompressed4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1d - // cudaResViewFormatUnsignedBlockCompressed5 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC5", {"hipResViewFormatUnsignedBlockCompressed5", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1e - // cudaResViewFormatSignedBlockCompressed5 - {"CU_RES_VIEW_FORMAT_SIGNED_BC5", {"hipResViewFormatSignedBlockCompressed5", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1f - // cudaResViewFormatUnsignedBlockCompressed6H - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC6H", {"hipResViewFormatUnsignedBlockCompressed6H", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x20 - // cudaResViewFormatSignedBlockCompressed6H - {"CU_RES_VIEW_FORMAT_SIGNED_BC6H", {"hipResViewFormatSignedBlockCompressed6H", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x21 - // cudaResViewFormatUnsignedBlockCompressed7 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC7", {"hipResViewFormatUnsignedBlockCompressed7", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x22 - - // cudaError - {"CUresult", {"hipError_t", "", CONV_TYPE, API_DRIVER}}, - {"cudaError_enum", {"hipError_t", "", CONV_TYPE, API_DRIVER}}, - // CUresult enum values - // cudaSuccess - {"CUDA_SUCCESS", {"hipSuccess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - // cudaErrorInvalidValue - {"CUDA_ERROR_INVALID_VALUE", {"hipErrorInvalidValue", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - // cudaErrorMemoryAllocation - {"CUDA_ERROR_OUT_OF_MEMORY", {"hipErrorOutOfMemory", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - // cudaErrorInitializationError - {"CUDA_ERROR_NOT_INITIALIZED", {"hipErrorNotInitialized", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - // cudaErrorCudartUnloading - {"CUDA_ERROR_DEINITIALIZED", {"hipErrorDeinitialized", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 4 - // cudaErrorProfilerDisabled - {"CUDA_ERROR_PROFILER_DISABLED", {"hipErrorProfilerDisabled", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 5 - // cudaErrorProfilerNotInitialized - // NOTE: Deprecated since CUDA 5.0 - {"CUDA_ERROR_PROFILER_NOT_INITIALIZED", {"hipErrorProfilerNotInitialized", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 6 - // NOTE: Deprecated since CUDA 5.0 - // cudaErrorProfilerAlreadyStarted - {"CUDA_ERROR_PROFILER_ALREADY_STARTED", {"hipErrorProfilerAlreadyStarted", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 7 - // cudaErrorProfilerAlreadyStopped - // NOTE: Deprecated since CUDA 5.0 - {"CUDA_ERROR_PROFILER_ALREADY_STOPPED", {"hipErrorProfilerAlreadyStopped", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 8 - // cudaErrorNoDevice - {"CUDA_ERROR_NO_DEVICE", {"hipErrorNoDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 100 - // cudaErrorInvalidDevice - {"CUDA_ERROR_INVALID_DEVICE", {"hipErrorInvalidDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 101 - // cudaErrorInvalidKernelImage - {"CUDA_ERROR_INVALID_IMAGE", {"hipErrorInvalidImage", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 200 - // cudaErrorDeviceUninitilialized - {"CUDA_ERROR_INVALID_CONTEXT", {"hipErrorInvalidContext", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 201 - // no analogue - // NOTE: Deprecated since CUDA 3.2 - {"CUDA_ERROR_CONTEXT_ALREADY_CURRENT", {"hipErrorContextAlreadyCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 202 - // cudaErrorMapBufferObjectFailed - {"CUDA_ERROR_MAP_FAILED", {"hipErrorMapFailed", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 205 - // cudaErrorUnmapBufferObjectFailed - {"CUDA_ERROR_UNMAP_FAILED", {"hipErrorUnmapFailed", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 206 - // cudaErrorArrayIsMapped - {"CUDA_ERROR_ARRAY_IS_MAPPED", {"hipErrorArrayIsMapped", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 207 - // cudaErrorAlreadyMapped - {"CUDA_ERROR_ALREADY_MAPPED", {"hipErrorAlreadyMapped", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 208 - // cudaErrorNoKernelImageForDevice - {"CUDA_ERROR_NO_BINARY_FOR_GPU", {"hipErrorNoBinaryForGpu", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 209 - // cudaErrorAlreadyAcquired - {"CUDA_ERROR_ALREADY_ACQUIRED", {"hipErrorAlreadyAcquired", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 210 - // cudaErrorNotMapped - {"CUDA_ERROR_NOT_MAPPED", {"hipErrorNotMapped", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 211 - // cudaErrorNotMappedAsArray - {"CUDA_ERROR_NOT_MAPPED_AS_ARRAY", {"hipErrorNotMappedAsArray", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 212 - // cudaErrorNotMappedAsPointer - {"CUDA_ERROR_NOT_MAPPED_AS_POINTER", {"hipErrorNotMappedAsPointer", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 213 - // cudaErrorECCUncorrectable - {"CUDA_ERROR_ECC_UNCORRECTABLE", {"hipErrorECCNotCorrectable", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 214 - // cudaErrorUnsupportedLimit - {"CUDA_ERROR_UNSUPPORTED_LIMIT", {"hipErrorUnsupportedLimit", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 215 - // cudaErrorDeviceAlreadyInUse - {"CUDA_ERROR_CONTEXT_ALREADY_IN_USE", {"hipErrorContextAlreadyInUse", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 216 - // cudaErrorPeerAccessUnsupported - {"CUDA_ERROR_PEER_ACCESS_UNSUPPORTED", {"hipErrorPeerAccessUnsupported", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 217 - // cudaErrorInvalidPtx - {"CUDA_ERROR_INVALID_PTX", {"hipErrorInvalidKernelFile", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 218 - // cudaErrorInvalidGraphicsContext - {"CUDA_ERROR_INVALID_GRAPHICS_CONTEXT", {"hipErrorInvalidGraphicsContext", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 219 - // cudaErrorNvlinkUncorrectable - {"CUDA_ERROR_NVLINK_UNCORRECTABLE", {"hipErrorNvlinkUncorrectable", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 220 - // cudaErrorJitCompilerNotFound - {"CUDA_ERROR_JIT_COMPILER_NOT_FOUND", {"hipErrorJitCompilerNotFound", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 221 - // cudaErrorInvalidSource - {"CUDA_ERROR_INVALID_SOURCE", {"hipErrorInvalidSource", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 300 - // cudaErrorFileNotFound - {"CUDA_ERROR_FILE_NOT_FOUND", {"hipErrorFileNotFound", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 301 - // cudaErrorSharedObjectSymbolNotFound - {"CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND", {"hipErrorSharedObjectSymbolNotFound", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 302 - // cudaErrorSharedObjectInitFailed - {"CUDA_ERROR_SHARED_OBJECT_INIT_FAILED", {"hipErrorSharedObjectInitFailed", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 303 - // cudaErrorOperatingSystem - {"CUDA_ERROR_OPERATING_SYSTEM", {"hipErrorOperatingSystem", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 304 - // cudaErrorInvalidResourceHandle - {"CUDA_ERROR_INVALID_HANDLE", {"hipErrorInvalidHandle", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 400 - // cudaErrorIllegalState - {"CUDA_ERROR_ILLEGAL_STATE", {"hipErrorIllegalState", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 401 - // cudaErrorSymbolNotFound - {"CUDA_ERROR_NOT_FOUND", {"hipErrorNotFound", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 500 - // cudaErrorNotReady - {"CUDA_ERROR_NOT_READY", {"hipErrorNotReady", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 600 - // cudaErrorIllegalAddress - {"CUDA_ERROR_ILLEGAL_ADDRESS", {"hipErrorIllegalAddress", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 700 - // cudaErrorLaunchOutOfResources - {"CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES", {"hipErrorLaunchOutOfResources", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 701 - // cudaErrorLaunchTimeout - {"CUDA_ERROR_LAUNCH_TIMEOUT", {"hipErrorLaunchTimeOut", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 702 - // cudaErrorLaunchIncompatibleTexturing - {"CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING", {"hipErrorLaunchIncompatibleTexturing", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 703 - // cudaErrorPeerAccessAlreadyEnabled - {"CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED", {"hipErrorPeerAccessAlreadyEnabled", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 704 - // cudaErrorPeerAccessNotEnabled - {"CUDA_ERROR_PEER_ACCESS_NOT_ENABLED", {"hipErrorPeerAccessNotEnabled", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 705 - // cudaErrorSetOnActiveProcess - {"CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE", {"hipErrorSetOnActiveProcess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 708 - // cudaErrorContextIsDestroyed - {"CUDA_ERROR_CONTEXT_IS_DESTROYED", {"hipErrorContextIsDestroyed", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 709 - // cudaErrorAssert - {"CUDA_ERROR_ASSERT", {"hipErrorAssert", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 710 - // cudaErrorTooManyPeers - {"CUDA_ERROR_TOO_MANY_PEERS", {"hipErrorTooManyPeers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 711 - // cudaErrorHostMemoryAlreadyRegistered - {"CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED", {"hipErrorHostMemoryAlreadyRegistered", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 712 - // cudaErrorHostMemoryNotRegistered - {"CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED", {"hipErrorHostMemoryNotRegistered", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 713 - // cudaErrorHardwareStackError - {"CUDA_ERROR_HARDWARE_STACK_ERROR", {"hipErrorHardwareStackError", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 714 - // cudaErrorIllegalInstruction - {"CUDA_ERROR_ILLEGAL_INSTRUCTION", {"hipErrorIllegalInstruction", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 715 - // cudaErrorMisalignedAddress - {"CUDA_ERROR_MISALIGNED_ADDRESS", {"hipErrorMisalignedAddress", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 716 - // cudaErrorInvalidAddressSpace - {"CUDA_ERROR_INVALID_ADDRESS_SPACE", {"hipErrorInvalidAddressSpace", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 717 - // cudaErrorInvalidPc - {"CUDA_ERROR_INVALID_PC", {"hipErrorInvalidPc", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 718 - // cudaErrorLaunchFailure - {"CUDA_ERROR_LAUNCH_FAILED", {"hipErrorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 719 - // cudaErrorCooperativeLaunchTooLarge - {"CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE", {"hipErrorCooperativeLaunchTooLarge", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 720 - // cudaErrorNotPermitted - {"CUDA_ERROR_NOT_PERMITTED", {"hipErrorNotPermitted", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 800 - // cudaErrorNotSupported - {"CUDA_ERROR_NOT_SUPPORTED", {"hipErrorNotSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 801 - // cudaErrorSystemNotReady - {"CUDA_ERROR_SYSTEM_NOT_READY", {"hipErrorSystemNotReady", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 802 - // cudaErrorSystemDriverMismatch - {"CUDA_ERROR_SYSTEM_DRIVER_MISMATCH", {"hipErrorSystemDriverMismatch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 803 - // cudaErrorCompatNotSupportedOnDevice - {"CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE", {"hipErrorCompatNotSupportedOnDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 804 - // cudaErrorStreamCaptureUnsupported - {"CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 900 - // cudaErrorStreamCaptureInvalidated - {"CUDA_ERROR_STREAM_CAPTURE_INVALIDATED", {"hipErrorStreamCaptureInvalidated", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 901 - // cudaErrorStreamCaptureMerge - {"CUDA_ERROR_STREAM_CAPTURE_MERGE", {"hipErrorStreamCaptureMerge", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 902 - // cudaErrorStreamCaptureUnmatched - {"CUDA_ERROR_STREAM_CAPTURE_UNMATCHED", {"hipErrorStreamCaptureUnmatched", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 903 - // cudaErrorStreamCaptureUnjoined - {"CUDA_ERROR_STREAM_CAPTURE_UNJOINED", {"hipErrorStreamCaptureUnjoined", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 904 - // cudaErrorStreamCaptureIsolation - {"CUDA_ERROR_STREAM_CAPTURE_ISOLATION", {"hipErrorStreamCaptureIsolation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 905 - // cudaErrorStreamCaptureImplicit - {"CUDA_ERROR_STREAM_CAPTURE_IMPLICIT", {"hipErrorStreamCaptureImplicit", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 906 - // cudaErrorCapturedEvent - {"CUDA_ERROR_CAPTURED_EVENT", {"hipErrorCapturedEvent", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 907 - // cudaErrorStreamCaptureWrongThread - {"CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD", {"hipErrorStreamCaptureWrongThread", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 908 - // cudaErrorTimeout - {"CUDA_ERROR_TIMEOUT", {"hipErrorTimeout", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 909 - // cudaErrorGraphExecUpdateFailure - {"CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 910 - // cudaErrorUnknown - {"CUDA_ERROR_UNKNOWN", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 999 - - // cudaSharedMemConfig - {"CUsharedconfig", {"hipSharedMemConfig", "", CONV_TYPE, API_DRIVER}}, - {"CUsharedconfig_enum", {"hipSharedMemConfig", "", CONV_TYPE, API_DRIVER}}, - // CUsharedconfig enum values - // cudaSharedMemBankSizeDefault = 0 - {"CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE", {"hipSharedMemBankSizeDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaSharedMemBankSizeFourByte = 1 - {"CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE", {"hipSharedMemBankSizeFourByte", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaSharedMemBankSizeEightByte = 2 - {"CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE", {"hipSharedMemBankSizeEightByte", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - - // cudaSharedCarveout - {"CUshared_carveout", {"hipSharedCarveout", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUshared_carveout_enum", {"hipSharedCarveout", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUshared_carveout enum values - // cudaSharedmemCarveoutDefault - {"CU_SHAREDMEM_CARVEOUT_DEFAULT", {"hipSharedmemCarveoutDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // -1 - // cudaSharedmemCarveoutMaxShared - {"CU_SHAREDMEM_CARVEOUT_MAX_SHARED", {"hipSharedmemCarveoutMaxShared", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 100 - // cudaSharedmemCarveoutMaxShared - {"CU_SHAREDMEM_CARVEOUT_MAX_L1", {"hipSharedmemCarveoutMaxL1", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - - // no analogue - {"CUstream_flags", {"hipStreamFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstream_flags_enum", {"hipStreamFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstream_flags enum values - // cudaStreamDefault = 0x00 - {"CU_STREAM_DEFAULT", {"hipStreamDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0 - // cudaStreamNonBlocking = 0x01 - {"CU_STREAM_NON_BLOCKING", {"hipStreamNonBlocking", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1 - - // no analogue - {"CUstreamBatchMemOpType", {"hipStreamBatchMemOpType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamBatchMemOpType_enum", {"hipStreamBatchMemOpType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamBatchMemOpType enum values - {"CU_STREAM_MEM_OP_WAIT_VALUE_32", {"hipStreamBatchMemOpWaitValue32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - {"CU_STREAM_MEM_OP_WRITE_VALUE_32", {"hipStreamBatchMemOpWriteValue32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - {"CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES", {"hipStreamBatchMemOpFlushRemoteWrites", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - {"CU_STREAM_MEM_OP_WAIT_VALUE_64", {"hipStreamBatchMemOpWaitValue64", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - {"CU_STREAM_MEM_OP_WRITE_VALUE_64", {"hipStreamBatchMemOpWriteValue64", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - - // cudaStreamCaptureStatus - {"CUstreamCaptureStatus", {"hipStreamCaptureStatus", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamCaptureStatus_enum", {"hipStreamCaptureStatus", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamCaptureStatus enum values - // cudaStreamCaptureStatusNone - {"CU_STREAM_CAPTURE_STATUS_NONE", {"hipStreamCaptureStatusNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // cudaStreamCaptureStatusActive - {"CU_STREAM_CAPTURE_STATUS_ACTIVE", {"hipStreamCaptureStatusActive", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaStreamCaptureStatusInvalidated - {"CU_STREAM_CAPTURE_STATUS_INVALIDATED", {"hipStreamCaptureStatusInvalidated", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - - // cudaStreamCaptureMode - {"CUstreamCaptureMode", {"hipStreamCaptureMode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamCaptureMode_enum", {"hipStreamCaptureMode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamCaptureMode enum values - // cudaStreamCaptureModeGlobal - {"CU_STREAM_CAPTURE_MODE_GLOBAL", {"hipStreamCaptureModeGlobal", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // cudaStreamCaptureModeThreadLocal - {"CU_STREAM_CAPTURE_MODE_THREAD_LOCAL", {"hipStreamCaptureModeThreadLocal", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaStreamCaptureModeRelaxed - {"CU_STREAM_CAPTURE_MODE_RELAXED", {"hipStreamCaptureModeRelaxed", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - - // no analogue - {"CUstreamWaitValue_flags", {"hipStreamWaitValueFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamWaitValue_flags_enum", {"hipStreamWaitValueFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamWaitValue_flags enum values - {"CU_STREAM_WAIT_VALUE_GEQ", {"hipStreamWaitValueGeq", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - {"CU_STREAM_WAIT_VALUE_EQ", {"hipStreamWaitValueEq", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CU_STREAM_WAIT_VALUE_AND", {"hipStreamWaitValueAnd", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2 - {"CU_STREAM_WAIT_VALUE_FLUSH", {"hipStreamWaitValueFlush", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1<<30 - - // no analogue - {"CUstreamWriteValue_flags", {"hipStreamWriteValueFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamWriteValue_flags_enum", {"hipStreamWriteValueFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamWriteValue_flags enum values - {"CU_STREAM_WRITE_VALUE_DEFAULT", {"hipStreamWriteValueDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - {"CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER", {"hipStreamWriteValueNoMemoryBarrier", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - - // cudaGLDeviceList - {"CUGLDeviceList", {"hipGLDeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUGLDeviceList_enum", {"hipGLDeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUGLDeviceList enum values - // cudaGLDeviceListAll = 1 - {"CU_GL_DEVICE_LIST_ALL", {"hipGLDeviceListAll", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaGLDeviceListCurrentFrame = 2 - {"CU_GL_DEVICE_LIST_CURRENT_FRAME", {"hipGLDeviceListCurrentFrame", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaGLDeviceListNextFrame = 3 - {"CU_GL_DEVICE_LIST_NEXT_FRAME", {"hipGLDeviceListNextFrame", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - - // cudaGLMapFlags - {"CUGLmap_flags", {"hipGLMapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUGLmap_flags_enum", {"hipGLMapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUGLmap_flags enum values - // cudaGLMapFlagsNone = 0 - {"CU_GL_MAP_RESOURCE_FLAGS_NONE", {"hipGLMapFlagsNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaGLMapFlagsReadOnly = 1 - {"CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY", {"hipGLMapFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaGLMapFlagsWriteDiscard = 2 - {"CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD", {"hipGLMapFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - - // cudaD3D9DeviceList - {"CUd3d9DeviceList", {"hipD3D9DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d9DeviceList_enum", {"hipD3D9DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d9DeviceList enum values - // cudaD3D9DeviceListAll = 1 - {"CU_D3D9_DEVICE_LIST_ALL", {"HIP_D3D9_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D9DeviceListCurrentFrame = 2 - {"CU_D3D9_DEVICE_LIST_CURRENT_FRAME", {"HIP_D3D9_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaD3D9DeviceListNextFrame = 3 - {"CU_D3D9_DEVICE_LIST_NEXT_FRAME", {"HIP_D3D9_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - - // cudaD3D9MapFlags - // NOTE: Deprecated - {"CUd3d9map_flags", {"hipD3D9MapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d9map_flags_enum", {"hipD3D9MapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d9map_flags enum values - // cudaD3D9MapFlagsNone = 0 - {"CU_D3D9_MAPRESOURCE_FLAGS_NONE", {"HIP_D3D9_MAPRESOURCE_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaD3D9MapFlagsReadOnly = 1 - {"CU_D3D9_MAPRESOURCE_FLAGS_READONLY", {"HIP_D3D9_MAPRESOURCE_FLAGS_READONLY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D9MapFlagsWriteDiscard = 2 - {"CU_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD", {"HIP_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - - // cudaD3D9RegisterFlags - {"CUd3d9register_flags", {"hipD3D9RegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d9register_flags_enum", {"hipD3D9RegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d9register_flags enum values - // cudaD3D9RegisterFlagsNone = 0 - {"CU_D3D9_REGISTER_FLAGS_NONE", {"HIP_D3D9_REGISTER_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaD3D9RegisterFlagsArray = 1 - {"CU_D3D9_REGISTER_FLAGS_ARRAY", {"HIP_D3D9_REGISTER_FLAGS_ARRAY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - - // cudaD3D10DeviceList - {"CUd3d10DeviceList", {"hipd3d10DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d10DeviceList_enum", {"hipD3D10DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d10DeviceList enum values - // cudaD3D10DeviceListAll = 1 - {"CU_D3D10_DEVICE_LIST_ALL", {"HIP_D3D10_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D10DeviceListCurrentFrame = 2 - {"CU_D3D10_DEVICE_LIST_CURRENT_FRAME", {"HIP_D3D10_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaD3D10DeviceListNextFrame = 3 - {"CU_D3D10_DEVICE_LIST_NEXT_FRAME", {"HIP_D3D10_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - - // cudaD3D10MapFlags - {"CUd3d10map_flags", {"hipD3D10MapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d10map_flags_enum", {"hipD3D10MapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d10map_flags enum values - // cudaD3D10MapFlagsNone = 0 - {"CU_D3D10_MAPRESOURCE_FLAGS_NONE", {"HIP_D3D10_MAPRESOURCE_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaD3D10MapFlagsReadOnly = 1 - {"CU_D3D10_MAPRESOURCE_FLAGS_READONLY", {"HIP_D3D10_MAPRESOURCE_FLAGS_READONLY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D10MapFlagsWriteDiscard = 2 - {"CU_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD", {"HIP_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - - // cudaD3D10RegisterFlags - {"CUd3d10register_flags", {"hipD3D10RegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d10register_flags_enum", {"hipD3D10RegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d10register_flags enum values - // cudaD3D10RegisterFlagsNone = 0 - {"CU_D3D10_REGISTER_FLAGS_NONE", {"HIP_D3D10_REGISTER_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaD3D10RegisterFlagsArray = 1 - {"CU_D3D10_REGISTER_FLAGS_ARRAY", {"HIP_D3D10_REGISTER_FLAGS_ARRAY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - - // cudaD3D11DeviceList - {"CUd3d11DeviceList", {"hipd3d11DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d11DeviceList_enum", {"hipD3D11DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d11DeviceList enum values - // cudaD3D11DeviceListAll = 1 - {"CU_D3D11_DEVICE_LIST_ALL", {"HIP_D3D11_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D11DeviceListCurrentFrame = 2 - {"CU_D3D11_DEVICE_LIST_CURRENT_FRAME", {"HIP_D3D11_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaD3D11DeviceListNextFrame = 3 - {"CU_D3D11_DEVICE_LIST_NEXT_FRAME", {"HIP_D3D11_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - - // no analogue - {"CUmemAllocationHandleType", {"hipMemoryAllocationHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAllocationHandleType_enum", {"hipMemoryAllocationHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemAllocationHandleType enum values - {"CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR", {"HIP_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CU_MEM_HANDLE_TYPE_WIN32", {"HIP_MEM_HANDLE_TYPE_WIN32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2 - {"CU_MEM_HANDLE_TYPE_WIN32_KMT", {"HIP_MEM_HANDLE_TYPE_WIN32_KMT", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x4 - {"CU_MEM_HANDLE_TYPE_MAX", {"HIP_MEM_HANDLE_TYPE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0xFFFFFFFF - - // no analogue - {"CUmemAccess_flags", {"hipMemoryAccessFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAccess_flags_enum", {"hipMemoryAccessFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemAccess_flags enum values - {"CU_MEM_ACCESS_FLAGS_PROT_NONE", {"HIP_MEM_ACCESS_FLAGS_PROT_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CU_MEM_ACCESS_FLAGS_PROT_READ", {"HIP_MEM_ACCESS_FLAGS_PROT_READ", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2 - {"CU_MEM_ACCESS_FLAGS_PROT_READWRITE", {"HIP_MEM_ACCESS_FLAGS_PROT_READWRITE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3 - {"CU_MEM_ACCESS_FLAGS_PROT_MAX", {"HIP_MEM_ACCESS_FLAGS_PROT_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0xFFFFFFFF - - // no analogue - {"CUmemLocationType", {"hipMemoryLocationType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemLocationType_enum", {"hipMemoryLocationType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemLocationType enum values - {"CU_MEM_LOCATION_TYPE_INVALID", {"HIP_MEM_LOCATION_TYPE_INVALID", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - {"CU_MEM_LOCATION_TYPE_DEVICE", {"HIP_MEM_LOCATION_TYPE_DEVICE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CU_MEM_LOCATION_TYPE_MAX", {"HIP_MEM_LOCATION_TYPE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0xFFFFFFFF - - // no analogue - {"CUmemAllocationGranularity_flags", {"hipMemoryAllocationGranularityFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAllocationGranularity_flags_enum", {"hipMemoryLocationType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemAllocationGranularity_flags enum values - {"CU_MEM_ALLOC_GRANULARITY_MINIMUM", {"HIP_MEM_ALLOC_GRANULARITY_MINIMUM", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - {"CU_MEM_ALLOC_GRANULARITY_RECOMMENDED", {"HIP_MEM_ALLOC_GRANULARITY_RECOMMENDED", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - - // 4. Typedefs - - // no analogue - {"CUdevice", {"hipDevice_t", "", CONV_TYPE, API_DRIVER}}, - {"CUdeviceptr", {"hipDeviceptr_t", "", CONV_TYPE, API_DRIVER}}, - - // cudaHostFn_t - {"CUhostFn", {"hipHostFn", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUoccupancyB2DSize", {"hipOccupancyB2DSize", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaStreamCallback_t - {"CUstreamCallback", {"hipStreamCallback_t", "", CONV_TYPE, API_DRIVER}}, - - // cudaSurfaceObject_t - {"CUsurfObject", {"hipSurfaceObject", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaTextureObject_t - {"CUtexObject", {"hipTextureObject_t", "", CONV_TYPE, API_DRIVER}}, - - // 5. Defines - - {"__CUDACC__", {"__HIPCC__", "", CONV_DEFINE, API_DRIVER}}, - {"CUDA_CB", {"HIP_CB", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaCpuDeviceId ((int)-1) - {"CU_DEVICE_CPU", {"hipCpuDeviceId", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // ((CUdevice)-1) - // cudaInvalidDeviceId ((int)-1) - {"CU_DEVICE_INVALID", {"hipInvalidDeviceId", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // ((CUdevice)-2) - // CUDA_IPC_HANDLE_SIZE - {"CU_IPC_HANDLE_SIZE", {"HIP_IPC_HANDLE_SIZE", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 64 - {"CU_LAUNCH_PARAM_BUFFER_POINTER", {"HIP_LAUNCH_PARAM_BUFFER_POINTER", "", CONV_DEFINE, API_DRIVER}}, // ((void*)0x01) - {"CU_LAUNCH_PARAM_BUFFER_SIZE", {"HIP_LAUNCH_PARAM_BUFFER_SIZE", "", CONV_DEFINE, API_DRIVER}}, // ((void*)0x02) - {"CU_LAUNCH_PARAM_END", {"HIP_LAUNCH_PARAM_END", "", CONV_DEFINE, API_DRIVER}}, // ((void*)0x00) - // cudaHostAllocPortable - {"CU_MEMHOSTALLOC_PORTABLE", {"hipHostMallocPortable", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - // cudaHostAllocMapped - {"CU_MEMHOSTALLOC_DEVICEMAP", {"hipHostMallocMapped", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - // cudaHostAllocWriteCombined - {"CU_MEMHOSTALLOC_WRITECOMBINED", {"hipHostMallocWriteCombined", "", CONV_DEFINE, API_DRIVER}}, // 0x04 - // cudaHostRegisterPortable - {"CU_MEMHOSTREGISTER_PORTABLE", {"hipHostRegisterPortable", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - // cudaHostRegisterMapped - {"CU_MEMHOSTREGISTER_DEVICEMAP", {"hipHostRegisterMapped", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - // cudaHostRegisterIoMemory - {"CU_MEMHOSTREGISTER_IOMEMORY", {"hipHostRegisterIoMemory", "", CONV_DEFINE, API_DRIVER}}, // 0x04 - {"CU_PARAM_TR_DEFAULT", {"HIP_PARAM_TR_DEFAULT", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // -1 - // cudaStreamLegacy ((cudaStream_t)0x1) - {"CU_STREAM_LEGACY", {"hipStreamLegacy", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // ((CUstream)0x1) - // cudaStreamPerThread ((cudaStream_t)0x2) - {"CU_STREAM_PER_THREAD", {"hipStreamPerThread", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // ((CUstream)0x2) - {"CU_TRSA_OVERRIDE_FORMAT", {"HIP_TRSA_OVERRIDE_FORMAT", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - {"CU_TRSF_NORMALIZED_COORDINATES", {"HIP_TRSF_NORMALIZED_COORDINATES", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - {"CU_TRSF_READ_AS_INTEGER", {"HIP_TRSF_READ_AS_INTEGER", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - {"CU_TRSF_SRGB", {"HIP_TRSF_SRGB", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x10 - // no analogue - // NOTE: Deprecated, use CUDA_ARRAY3D_LAYERED - {"CUDA_ARRAY3D_2DARRAY", {"HIP_ARRAY3D_2DARRAY", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaArrayLayered - {"CUDA_ARRAY3D_LAYERED", {"hipArrayLayered", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - // cudaArraySurfaceLoadStore - {"CUDA_ARRAY3D_SURFACE_LDST", {"hipArraySurfaceLoadStore", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - // cudaArrayCubemap - {"CUDA_ARRAY3D_CUBEMAP", {"hipArrayCubemap", "", CONV_DEFINE, API_DRIVER}}, // 0x04 - // cudaArrayTextureGather - {"CUDA_ARRAY3D_TEXTURE_GATHER", {"hipArrayTextureGather", "", CONV_DEFINE, API_DRIVER}}, // 0x08 - // no analogue - {"CUDA_ARRAY3D_DEPTH_TEXTURE", {"hipArrayDepthTexture", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x10 - // cudaArrayColorAttachment - {"CUDA_ARRAY3D_COLOR_ATTACHMENT", {"hipArrayColorAttachment", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x20 - // cudaCooperativeLaunchMultiDeviceNoPreSync - {"CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC", {"hipCooperativeLaunchMultiDeviceNoPreSync", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - // cudaCooperativeLaunchMultiDeviceNoPostSync - {"CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC", {"hipCooperativeLaunchMultiDeviceNoPostSync", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - // cudaExternalMemoryDedicated - {"CUDA_EXTERNAL_MEMORY_DEDICATED", {"hipExternalMemoryDedicated", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - // cudaExternalSemaphoreSignalSkipNvSciBufMemSync - {"CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC", {"hipExternalSemaphoreSignalSkipNvSciBufMemSync", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaExternalSemaphoreWaitSkipNvSciBufMemSync - {"CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC", {"hipExternalSemaphoreWaitSkipNvSciBufMemSync", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaNvSciSyncAttrSignal - {"CUDA_NVSCISYNC_ATTR_SIGNAL", {"hipNvSciSyncAttrSignal", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - // cudaNvSciSyncAttrWait - {"CUDA_NVSCISYNC_ATTR_WAIT", {"hipNvSciSyncAttrWait", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CUDA_VERSION", {"HIP_VERSION", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 10000 -}; diff --git a/hipify-clang/src/CUDA2HIP_FFT_API_functions.cpp b/hipify-clang/src/CUDA2HIP_FFT_API_functions.cpp deleted file mode 100644 index 29e51f9b5c..0000000000 --- a/hipify-clang/src/CUDA2HIP_FFT_API_functions.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_FFT_FUNCTION_MAP{ - {"cufftPlan1d", {"hipfftPlan1d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftPlan2d", {"hipfftPlan2d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftPlan3d", {"hipfftPlan3d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftPlanMany", {"hipfftPlanMany", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlan1d", {"hipfftMakePlan1d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlan2d", {"hipfftMakePlan2d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlan3d", {"hipfftMakePlan3d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlanMany", {"hipfftMakePlanMany", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlanMany64", {"hipfftMakePlanMany64", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSizeMany64", {"hipfftGetSizeMany64", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftEstimate1d", {"hipfftEstimate1d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftEstimate2d", {"hipfftEstimate2d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftEstimate3d", {"hipfftEstimate3d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftEstimateMany", {"hipfftEstimateMany", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftCreate", {"hipfftCreate", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSize1d", {"hipfftGetSize1d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSize2d", {"hipfftGetSize2d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSize3d", {"hipfftGetSize3d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSizeMany", {"hipfftGetSizeMany", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSize", {"hipfftGetSize", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftSetWorkArea", {"hipfftSetWorkArea", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftSetAutoAllocation", {"hipfftSetAutoAllocation", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecC2C", {"hipfftExecC2C", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecR2C", {"hipfftExecR2C", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecC2R", {"hipfftExecC2R", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecZ2Z", {"hipfftExecZ2Z", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecD2Z", {"hipfftExecD2Z", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecZ2D", {"hipfftExecZ2D", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftSetStream", {"hipfftSetStream", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftDestroy", {"hipfftDestroy", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetVersion", {"hipfftGetVersion", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetProperty", {"hipfftGetProperty", "", CONV_LIB_FUNC, API_FFT, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_FFT_API_types.cpp b/hipify-clang/src/CUDA2HIP_FFT_API_types.cpp deleted file mode 100644 index 499afe7695..0000000000 --- a/hipify-clang/src/CUDA2HIP_FFT_API_types.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_FFT_TYPE_NAME_MAP{ - - // cuFFT defines - {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // -1 - {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING - - // cuFFT enums - {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT}}, - {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT}}, - {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x0 0 - {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x1 1 - {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x2 2 - {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x3 3 - {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x4 4 - {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x5 5 - {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x6 6 - {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x7 7 - {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x8 8 - {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x9 9 - {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xA 10 - {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xB 11 - {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xC 12 - {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xD 13 - {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xE 14 - {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, HIP_UNSUPPORTED}}, - {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x10 16 - {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT}}, - {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT}}, - {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x2a - {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x2c - {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x29 - {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x6a - {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x6c - {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x69 - {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, HIP_UNSUPPORTED}}, - {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, HIP_UNSUPPORTED}}, - {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, HIP_UNSUPPORTED}}, // 0x01 - - // cuFFT types - {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT}}, - {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT}}, - {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT}}, - {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT}}, - {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Perl.cpp b/hipify-clang/src/CUDA2HIP_Perl.cpp deleted file mode 100644 index d74ba70f21..0000000000 --- a/hipify-clang/src/CUDA2HIP_Perl.cpp +++ /dev/null @@ -1,488 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/Path.h" -#include "CUDA2HIP.h" -#include "CUDA2HIP_Scripting.h" -#include "ArgParse.h" -#include "StringUtils.h" -#include "LLVMCompat.h" -#include "Statistics.h" - -namespace perl { - - using namespace std; - using namespace llvm; - - const string sCopyright = - "##\n" - "# Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.\n" - "#\n" - "# Permission is hereby granted, free of charge, to any person obtaining a copy\n" - "# of this software and associated documentation files (the \"Software\"), to deal\n" - "# in the Software without restriction, including without limitation the rights\n" - "# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" - "# copies of the Software, and to permit persons to whom the Software is\n" - "# furnished to do so, subject to the following conditions:\n" - "#\n" - "# The above copyright notice and this permission notice shall be included in\n" - "# all copies or substantial portions of the Software.\n" - "#\n" - "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" - "# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" - "# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" - "# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" - "# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" - "# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" - "# THE SOFTWARE.\n" - "##\n"; - - const string sImportant = "# IMPORTANT: Do not change this file manually: it is generated by hipify-clang --perl"; - const string tab = " "; - const string tab_2 = tab + tab; - const string tab_3 = tab_2 + tab; - const string tab_4 = tab_3 + tab; - const string tab_5 = tab_4 + tab; - const string tab_6 = tab_5 + tab; - const string endl_2 = "\n\n"; - const string endl_tab = "\n" + tab; - const string endl_tab_2 = "\n" + tab_2; - const string endl_tab_3 = "\n" + tab_3; - const string endl_tab_4 = "\n" + tab_4; - const string endl_tab_5 = "\n" + tab_5; - const string sub = "sub "; - const string my = "my "; - const string my_k = my + "$k = 0;"; - const string return_0 = "return 0;\n"; - const string return_k = "return $k;\n"; - const string while_ = "while "; - const string unless_ = "unless "; - const string foreach = "foreach "; - const string foreach_func = foreach + "$func (\n"; - const string print = "print STDERR "; - const string printf = "printf STDERR "; - const string no_warns = "no warnings qw/uninitialized/;"; - const string hipify_perl = "hipify-perl"; - - const string sCudaDevice = "cudaDevice"; - const string sCudaDeviceId = "cudaDeviceId"; - const string sCudaDevices = "cudaDevices"; - const string sCudaDevice_t = "cudaDevice_t"; - const string sCudaIDs = "cudaIDs"; - const string sCudaGridDim = "cudaGridDim"; - const string sCudaDimGrid = "cudaDimGrid"; - const string sCudaDimBlock = "cudaDimBlock"; - const string sCudaGradInput = "cudaGradInput"; - const string sCudaGradOutput = "cudaGradOutput"; - const string sCudaInput = "cudaInput"; - const string sCudaOutput = "cudaOutput"; - const string sCudaIndices = "cudaIndices"; - const string sCudaGaugeField = "cudaGaugeField"; - const string sCudaMom = "cudaMom"; - const string sCudaGauge = "cudaGauge"; - const string sCudaInGauge = "cudaInGauge"; - const string sCudaColorSpinorField = "cudaColorSpinorField"; - const string sCudaSiteLink = "cudaSiteLink"; - const string sCudaFatLink = "cudaFatLink"; - const string sCudaStaple = "cudaStaple"; - const string sCudaCloverField = "cudaCloverField"; - const string sCudaParam = "cudaParam"; - - const set Whitelist{ - {sCudaDevice}, {sCudaDevice_t}, {sCudaIDs}, {sCudaGridDim}, {sCudaDimGrid}, {sCudaDimBlock}, {sCudaDeviceId}, {sCudaDevices}, - {sCudaGradInput}, {sCudaGradOutput}, {sCudaInput}, {sCudaOutput}, {sCudaIndices}, {sCudaGaugeField}, {sCudaMom}, {sCudaGauge}, - {sCudaInGauge}, {sCudaColorSpinorField}, {sCudaSiteLink}, {sCudaFatLink}, {sCudaStaple}, {sCudaCloverField}, {sCudaParam} - }; - - void generateHeader(unique_ptr &streamPtr) { - *streamPtr.get() << "#!/usr/bin/perl -w" << endl_2; - *streamPtr.get() << sCopyright << endl; - *streamPtr.get() << sImportant << endl_2; - *streamPtr.get() << "#usage " << hipify_perl << " [OPTIONS] INPUT_FILE" << endl_2; - *streamPtr.get() << "use Getopt::Long;" << endl; - *streamPtr.get() << my << "$whitelist = \"\";" << endl; - *streamPtr.get() << my << "$fileName = \"\";" << endl; - *streamPtr.get() << my << "%ft;" << endl; - *streamPtr.get() << my << "%Tkernels;" << endl_2; - *streamPtr.get() << "GetOptions(" << endl; - *streamPtr.get() << tab << " \"examine\" => \\$examine # Combines -no-output and -print-stats options." << endl; - *streamPtr.get() << tab << ", \"inplace\" => \\$inplace # Modify input file inplace, replacing input with hipified output, save backup in .prehip file." << endl; - *streamPtr.get() << tab << ", \"no-output\" => \\$no_output # Don't write any translated output to stdout." << endl; - *streamPtr.get() << tab << ", \"print-stats\" => \\$print_stats # Print translation statistics." << endl; - *streamPtr.get() << tab << ", \"quiet-warnings\" => \\$quiet_warnings # Don't print warnings on unknown CUDA functions." << endl; - *streamPtr.get() << tab << ", \"whitelist=s\" => \\$whitelist # TODO: test it beforehand" << endl; - *streamPtr.get() << ");" << endl_2; - *streamPtr.get() << "$print_stats = 1 if $examine;" << endl; - *streamPtr.get() << "$no_output = 1 if $examine;" << endl_2; - *streamPtr.get() << "# Whitelist of cuda[A-Z] identifiers, which are commonly used in CUDA sources but don't map to any CUDA API:" << endl; - *streamPtr.get() << "@whitelist = ("; - unsigned int num = 0; - for (const string &m : Whitelist) { - *streamPtr.get() << endl_tab << (num ? ", " : " ") << "\"" << m << "\""; - ++num; - } - *streamPtr.get() << endl << ");" << endl_2; - *streamPtr.get() << "push(@whitelist, split(',', $whitelist));" << endl_2; - } - - void generateStatFunctions(unique_ptr &streamPtr) { - *streamPtr.get() << endl << sub << "totalStats" << " {" << endl; - *streamPtr.get() << tab << my << "%count = %{ shift() };" << endl; - *streamPtr.get() << tab << my << "$total = 0;" << endl; - *streamPtr.get() << tab << foreach << "$key (keys %count) {" << endl; - *streamPtr.get() << tab_2 << "$total += $count{$key};" << endl_tab << "}" << endl; - *streamPtr.get() << tab << "return $total;" << endl << "};" << endl; - *streamPtr.get() << endl << sub << "printStats" << " {" << endl; - *streamPtr.get() << tab << my << "$label = shift();" << endl; - *streamPtr.get() << tab << my << "@statNames = @{ shift() };" << endl; - *streamPtr.get() << tab << my << "%counts = %{ shift() };" << endl; - *streamPtr.get() << tab << my << "$warnings = shift();" << endl; - *streamPtr.get() << tab << my << "$loc = shift();" << endl; - *streamPtr.get() << tab << my << "$total = totalStats(\\%counts);" << endl; - *streamPtr.get() << tab << printf << "\"%s %d CUDA->HIP refs ( \", $label, $total;" << endl; - *streamPtr.get() << tab << foreach << "$stat (@statNames) {" << endl; - *streamPtr.get() << tab_2 << printf << "\"%s:%d \", $stat, $counts{$stat};" << endl_tab << "}" << endl; - *streamPtr.get() << tab << printf << "\")\\n warn:%d LOC:%d\", $warnings, $loc;" << endl << "}" << endl; - for (int i = 0; i < 2; ++i) { - *streamPtr.get() << endl << sub << (i ? "clearStats" : "addStats") << " {" << endl; - *streamPtr.get() << tab << my << "$dest_ref = shift();" << endl; - *streamPtr.get() << tab << my << (i ? "@statNames = @{ shift() };" : "%adder = %{ shift() };") << endl; - *streamPtr.get() << tab << foreach << (i ? "$stat(@statNames)" : "$key (keys %adder)") << " {" << endl; - *streamPtr.get() << tab_2 << "$dest_ref->" << (i ? "{$stat} = 0;" : "{$key} += $adder{$key};") << endl_tab << "}" << endl << "}" << endl; - } - } - - void generateSimpleSubstitutions(unique_ptr &streamPtr) { - *streamPtr.get() << endl << sub << "simpleSubstitutions" << " {" << endl; - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - if (i == CONV_INCLUDE_CUDA_MAIN_H || i == CONV_INCLUDE) { - for (auto &ma : CUDA_INCLUDE_MAP) { - if (Statistics::isUnsupported(ma.second)) continue; - if (i == ma.second.type) { - string sCUDA = ma.first.str(); - string sHIP = ma.second.hipName.str(); - sCUDA = regex_replace(sCUDA, regex("/"), "\\/"); - sHIP = regex_replace(sHIP, regex("/"), "\\/"); - *streamPtr.get() << tab << "$ft{'" << counterNames[ma.second.type] << "'} += s/\\b" << sCUDA << "\\b/" << sHIP << "/g;" << endl; - } - } - } else { - for (auto &ma : CUDA_RENAMES_MAP()) { - if (Statistics::isUnsupported(ma.second)) continue; - if (i == ma.second.type) { - *streamPtr.get() << tab << "$ft{'" << counterNames[ma.second.type] << "'} += s/\\b" << ma.first.str() << "\\b/" << ma.second.hipName.str() << "/g;" << endl; - } - } - } - } - *streamPtr.get() << "}" << endl; - } - - void generateExternShared(unique_ptr &streamPtr) { - *streamPtr.get() << endl << "# CUDA extern __shared__ syntax replace with HIP_DYNAMIC_SHARED() macro" << endl; - *streamPtr.get() << sub << "transformExternShared" << " {" << endl; - *streamPtr.get() << tab << no_warns << endl; - *streamPtr.get() << tab << my_k << endl; - *streamPtr.get() << tab << "$k += s/extern\\s+([\\w\\(\\)]+)?\\s*__shared__\\s+([\\w:<>\\s]+)\\s+(\\w+)\\s*\\[\\s*\\]\\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;" << endl; - *streamPtr.get() << tab << "$ft{'extern_shared'} += $k;" << endl << "}" << endl; - } - - void generateKernelLaunch(unique_ptr &streamPtr) { - *streamPtr.get() << endl << "# CUDA Kernel Launch Syntax" << endl << sub << "transformKernelLaunch" << " {" << endl; - *streamPtr.get() << tab << no_warns << endl; - *streamPtr.get() << tab << my_k << endl_2; - - string s_k = "$k += s/([:|\\w]+)\\s*"; - *streamPtr.get() << tab << "# Handle the kern<...><<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6)/g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6, /g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0)/g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0, /g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0)/g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0, /g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;" << endl_2; - - *streamPtr.get() << tab << "if ($k) {" << endl; - *streamPtr.get() << tab_2 << "$ft{'kernel_launch'} += $k;" << endl; - *streamPtr.get() << tab_2 << "$Tkernels{$1}++;" << endl_tab << "}" << endl << "}" << endl; - } - - void generateCubNamespace(unique_ptr &streamPtr) { - *streamPtr.get() << endl << sub << "transformCubNamespace" << " {" << endl_tab << my_k << endl; - *streamPtr.get() << tab << "$k += s/using\\s*namespace\\s*cub/using namespace hipcub/g;" << endl; - *streamPtr.get() << tab << "$k += s/\\bcub::\\b/hipcub::/g;" << endl << tab << return_k << "}" << endl; - } - - void generateHostFunctions(unique_ptr &streamPtr) { - *streamPtr.get() << endl << sub << "transformHostFunctions" << " {" << endl_tab << my_k << endl; - set &funcSet = DeviceSymbolFunctions0; - const string s0 = "$k += s/(?second.hipName.str() << "\""; - count++; - } - } - *streamPtr.get() << endl_tab << ")" << endl_tab << "{" << endl_tab_2; - switch (i) { - case 0: - default: *streamPtr.get() << s0 << sHIP_SYMBOL << "\\($2\\),/g" << endl; break; - case 1: *streamPtr.get() << s1 << sHIP_SYMBOL << "\\($3\\)$4/g;" << endl; break; - case 2: *streamPtr.get() << s0 << s_reinterpret_cast << "\\($2\\),/g" << endl; break; - case 3: *streamPtr.get() << s1 << s_reinterpret_cast << "\\($3\\)$4/g;" << endl; break; - } - *streamPtr.get() << tab << "}" << endl; - } - *streamPtr.get() << tab << return_k << "}" << endl; - } - - void generateDeviceFunctions(unique_ptr &streamPtr) { - unsigned int countUnsupported = 0; - unsigned int countSupported = 0; - stringstream sSupported; - stringstream sUnsupported; - for (auto &ma : CUDA_DEVICE_FUNC_MAP) { - bool isUnsupported = Statistics::isUnsupported(ma.second); - (isUnsupported ? sUnsupported : sSupported) << ((isUnsupported && countUnsupported) || (!isUnsupported && countSupported) ? ",\n" : "") << tab_2 << "\"" << ma.first.str() << "\""; - if (isUnsupported) countUnsupported++; - else countSupported++; - } - stringstream subCountSupported; - stringstream subWarnUnsupported; - stringstream subCommon; - string sCommon = tab + my_k + "\n" + tab + foreach_func; - subCountSupported << endl << sub << "countSupportedDeviceFunctions" << " {" << endl << (countSupported ? sCommon : tab + return_0); - subWarnUnsupported << endl << sub << "warnUnsupportedDeviceFunctions" << " {" << endl << (countUnsupported ? tab + my + "$line_num = shift;\n" + sCommon : tab + return_0); - if (countSupported) subCountSupported << sSupported.str() << endl_tab << ")" << endl; - if (countUnsupported) subWarnUnsupported << sUnsupported.str() << endl_tab << ")" << endl; - if (countSupported || countUnsupported) { - subCommon << tab << "{" << endl; - subCommon << tab_2 << "# match device function from the list, except those, which have a namespace prefix (aka somenamespace::umin(...));" << endl; - subCommon << tab_2 << "# function with only global namespace qualifier '::' (aka ::umin(...)) should be treated as a device function (and warned as well as without such qualifier);" << endl; - subCommon << tab_2 << my << "$mt_namespace = m/(\\w+)::($func)\\s*\\(\\s*.*\\s*\\)/g;" << endl; - subCommon << tab_2 << my << "$mt = m/($func)\\s*\\(\\s*.*\\s*\\)/g;" << endl; - subCommon << tab_2 << "if ($mt && !$mt_namespace) {" << endl; - subCommon << tab_3 << "$k += $mt;" << endl; - } - if (countSupported) subCountSupported << subCommon.str(); - if (countUnsupported) { - subWarnUnsupported << subCommon.str(); - subWarnUnsupported << tab_3 << print << "\" warning: $fileName:$line_num: unsupported device function \\\"$func\\\": $_\\n\";" << endl; - } - if (countSupported || countUnsupported) sCommon = tab_2 + "}\n" + tab + "}\n" + tab + return_k; - if (countSupported) subCountSupported << sCommon; - if (countUnsupported) subWarnUnsupported << sCommon; - subCountSupported << "}" << endl; - subWarnUnsupported << "}" << endl; - *streamPtr.get() << subCountSupported.str(); - *streamPtr.get() << subWarnUnsupported.str(); - } - - bool generate(bool Generate) { - if (!Generate) return true; - string dstHipifyPerl = hipify_perl, dstHipifyPerlDir = OutputHipifyPerlDir; - error_code EC; - if (!dstHipifyPerlDir.empty()) { - string sOutputHipifyPerlDirAbsPath = getAbsoluteDirectoryPath(OutputHipifyPerlDir, EC, "output " + hipify_perl); - if (EC) return false; - dstHipifyPerl = sOutputHipifyPerlDirAbsPath + "/" + dstHipifyPerl; - } - SmallString<128> tmpFile; - EC = sys::fs::createTemporaryFile(dstHipifyPerl, hipify_perl, tmpFile); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << tmpFile << "\n"; - return false; - } - unique_ptr streamPtr = unique_ptr(new ofstream(tmpFile.c_str(), ios_base::trunc)); - generateHeader(streamPtr); - string sConv = my + "$apiCalls = "; - unsigned int exclude[3] = { CONV_DEVICE_FUNC, CONV_EXTERN_SHARED, CONV_KERNEL_LAUNCH }; - *streamPtr.get() << "@statNames = ("; - for (unsigned int i = 0; i < NUM_CONV_TYPES - 1; ++i) { - *streamPtr.get() << "\"" << counterNames[i] << "\", "; - if (any_of(exclude, exclude + 3, [&i](unsigned int x) { return x == i; })) continue; - sConv += "$ft{'" + string(counterNames[i]) + "'}" + (i < NUM_CONV_TYPES - 2 ? " + " : ";"); - } - if (sConv.back() == ' ') sConv = sConv.substr(0, sConv.size() - 3) + ";"; - *streamPtr.get() << "\"" << counterNames[NUM_CONV_TYPES - 1] << "\");" << endl; - generateStatFunctions(streamPtr); - generateSimpleSubstitutions(streamPtr); - generateExternShared(streamPtr); - generateKernelLaunch(streamPtr); - generateCubNamespace(streamPtr); - generateHostFunctions(streamPtr); - generateDeviceFunctions(streamPtr); - *streamPtr.get() << endl << "# Count of transforms in all files" << endl; - *streamPtr.get() << my << "%tt;" << endl; - *streamPtr.get() << "clearStats(\\%tt, \\@statNames);" << endl; - *streamPtr.get() << "$Twarnings = 0;" << endl; - *streamPtr.get() << "$TlineCount = 0;" << endl; - *streamPtr.get() << my << "%TwarningTags;" << endl; - *streamPtr.get() << my << "$fileCount = @ARGV;" << endl_2; - *streamPtr.get() << while_ << "(@ARGV) {" << endl; - *streamPtr.get() << tab << "$fileName=shift (@ARGV);" << endl; - *streamPtr.get() << tab << "if ($inplace) {" << endl; - *streamPtr.get() << tab_2 << my << "$file_prehip = \"$fileName\" . \".prehip\";" << endl; - *streamPtr.get() << tab_2 << my << "$infile;" << endl; - *streamPtr.get() << tab_2 << my << "$outfile;" << endl; - *streamPtr.get() << tab_2 << "if (-e $file_prehip) {" << endl; - *streamPtr.get() << tab_3 << "$infile = $file_prehip;" << endl; - *streamPtr.get() << tab_3 << "$outfile = $fileName;" << endl; - *streamPtr.get() << tab_2 << "} else {" << endl; - *streamPtr.get() << tab_3 << "system (\"cp $fileName $file_prehip\");" << endl; - *streamPtr.get() << tab_3 << "$infile = $file_prehip;" << endl; - *streamPtr.get() << tab_3 << "$outfile = $fileName;" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << "open(INFILE,\"<\", $infile) or die \"error: could not open $infile\";" << endl; - *streamPtr.get() << tab_2 << "open(OUTFILE,\">\", $outfile) or die \"error: could not open $outfile\";" << endl; - *streamPtr.get() << tab_2 << "$OUTFILE = OUTFILE;" << endl; - *streamPtr.get() << tab << "} else {" << endl; - *streamPtr.get() << tab_2 << "open(INFILE,\"<\", $fileName) or die \"error: could not open $fileName\";" << endl; - *streamPtr.get() << tab_2 << "$OUTFILE = STDOUT;" << endl_tab << "}" << endl; - *streamPtr.get() << tab << "# Count of transforms in this file" << endl; - *streamPtr.get() << tab << "clearStats(\\%ft, \\@statNames);" << endl; - *streamPtr.get() << tab << my << "$countIncludes = 0;" << endl; - *streamPtr.get() << tab << my << "$countKeywords = 0;" << endl; - *streamPtr.get() << tab << my << "$warnings = 0;" << endl; - *streamPtr.get() << tab << my << "%warningTags;" << endl; - *streamPtr.get() << tab << my << "$lineCount = 0;" << endl; - *streamPtr.get() << tab << "undef $/;" << endl; - *streamPtr.get() << tab << "# Read whole file at once, so we can match newlines" << endl; - *streamPtr.get() << tab << while_ << "() {" << endl; - *streamPtr.get() << tab_2 << "$countKeywords += m/__global__/;" << endl; - *streamPtr.get() << tab_2 << "$countKeywords += m/__shared__/;" << endl; - *streamPtr.get() << tab_2 << "simpleSubstitutions();" << endl; - *streamPtr.get() << tab_2 << "transformExternShared();" << endl; - *streamPtr.get() << tab_2 << "transformKernelLaunch();" << endl; - *streamPtr.get() << tab_2 << "transformCubNamespace();" << endl; - *streamPtr.get() << tab_2 << "if ($print_stats) {" << endl; - *streamPtr.get() << tab_3 << while_ << "(/(\\b(hip|HIP)([A-Z]|_)\\w+\\b)/g) {" << endl; - *streamPtr.get() << tab_4 << "$convertedTags{$1}++;" << endl_tab_3 << "}" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << my << "$hasDeviceCode = $countKeywords + $ft{'device_function'};" << endl; - *streamPtr.get() << tab_2 << unless_ << "($quiet_warnings) {" << endl; - *streamPtr.get() << tab_3 << "# Copy into array of lines, process line-by-line to show warnings" << endl; - *streamPtr.get() << tab_3 << "if ($hasDeviceCode or (/\\bcu|CU/) or (/<<<.*>>>/)) {" << endl; - *streamPtr.get() << tab_4 << my << "@lines = split /\\n/, $_;" << endl; - *streamPtr.get() << tab_4 << "# Copy the whole file" << endl; - *streamPtr.get() << tab_4 << my << "$tmp = $_;" << endl; - *streamPtr.get() << tab_4 << my << "$line_num = 0;" << endl; - *streamPtr.get() << tab_4 << foreach << "(@lines) {" << endl; - *streamPtr.get() << tab_5 << "$line_num++;" << endl; - *streamPtr.get() << tab_5 << "# Remove any whitelisted words" << endl; - *streamPtr.get() << tab_5 << foreach << "$w (@whitelist) {" << endl; - *streamPtr.get() << tab_6 << "s/\\b$w\\b/ZAP/" << endl_tab_5 << "}" << endl; - *streamPtr.get() << tab_5 << my << "$tag;" << endl; - *streamPtr.get() << tab_5 << "if ((/(\\bcuda[A-Z]\\w+)/) or (/<<<.*>>>/)) {" << endl; - *streamPtr.get() << tab_6 << "# Flag any remaining code that look like cuda API calls: may want to add these to hipify" << endl; - *streamPtr.get() << tab_6 << "$tag = (defined $1) ? $1 : \"Launch\";" << endl_tab_5 << "}" << endl; - *streamPtr.get() << tab_5 << "if (defined $tag) {" << endl; - *streamPtr.get() << tab_6 << "$warnings++;" << endl; - *streamPtr.get() << tab_6 << "$warningTags{$tag}++;" << endl; - *streamPtr.get() << tab_6 << print << "\" warning: $fileName:#$line_num : $_\\n\";" << endl_tab_5 << "}" << endl; - *streamPtr.get() << tab_5 << "$s = warnUnsupportedDeviceFunctions($line_num);" << endl; - *streamPtr.get() << tab_5 << "$warnings += $s;" << endl_tab_4 << "}" << endl; - *streamPtr.get() << tab_4 << "$_ = $tmp;" << endl_tab_3 << "}" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << "if ($hasDeviceCode > 0) {" << endl; - *streamPtr.get() << tab_3 << "$ft{'device_function'} += countSupportedDeviceFunctions();" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << "transformHostFunctions();" << endl; - *streamPtr.get() << tab_2 << "# TODO: would like to move this code outside loop but it uses $_ which contains the whole file" << endl; - *streamPtr.get() << tab_2 << unless_ << "($no_output) {" << endl; - *streamPtr.get() << tab_3 << sConv << endl; - *streamPtr.get() << tab_3 << my << "$kernStuff = $hasDeviceCode + $ft{'" << counterNames[CONV_KERNEL_LAUNCH] << "'} + $ft{'" << counterNames[CONV_DEVICE_FUNC] << "'};" << endl; - *streamPtr.get() << tab_3 << my << "$totalCalls = $apiCalls + $kernStuff;" << endl; - *streamPtr.get() << tab_3 << "$is_dos = m/\\r\\n$/;" << endl; - *streamPtr.get() << tab_3 << "if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) {" << endl; - *streamPtr.get() << tab_4 << "# TODO: implement hipify-clang's logic with header files AMAP" << endl; - *streamPtr.get() << tab_4 << "print $OUTFILE '#include \"hip/hip_runtime.h\"' . ($is_dos ? \"\\r\\n\" : \"\\n\");" << endl_tab_3 << "}" << endl; - *streamPtr.get() << tab_3 << "print $OUTFILE \"$_\";" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << "$lineCount = $_ =~ tr/\\n//;" << endl_tab << "}" << endl; - *streamPtr.get() << tab << my << "$totalConverted = totalStats(\\%ft);" << endl; - *streamPtr.get() << tab << "if (($totalConverted+$warnings) and $print_stats) {" << endl; - *streamPtr.get() << tab_2 << "printStats(\" info: converted\", \\@statNames, \\%ft, $warnings, $lineCount);" << endl; - *streamPtr.get() << tab_2 << print << "\" in '$fileName'\\n\";" << endl_tab << "}" << endl; - *streamPtr.get() << tab << "# Update totals for all files" << endl; - *streamPtr.get() << tab << "addStats(\\%tt, \\%ft);" << endl; - *streamPtr.get() << tab << "$Twarnings += $warnings;" << endl; - *streamPtr.get() << tab << "$TlineCount += $lineCount;" << endl; - *streamPtr.get() << tab << foreach << "$key (keys %warningTags) {" << endl; - *streamPtr.get() << tab_2 << "$TwarningTags{$key} += $warningTags{$key};" << endl_tab << "}" << endl << "}" << endl; - *streamPtr.get() << "# Print total stats for all files processed:" << endl; - *streamPtr.get() << "if ($print_stats and ($fileCount > 1)) {" << endl; - *streamPtr.get() << tab << print << "\"\\n\";" << endl; - *streamPtr.get() << tab << "printStats(\" info: TOTAL-converted\", \\@statNames, \\%tt, $Twarnings, $TlineCount);" << endl; - *streamPtr.get() << tab << print << "\"\\n\";" << endl; - *streamPtr.get() << tab << foreach << "my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) {" << endl; - *streamPtr.get() << tab_2 << printf << "\" warning: unconverted %s : %d\\n\", $key, $TwarningTags{$key};" << endl_tab << "}" << endl; - *streamPtr.get() << tab << my << "$kernelCnt = keys %Tkernels;" << endl; - *streamPtr.get() << tab << printf << "\" kernels (%d total) : \", $kernelCnt;" << endl; - *streamPtr.get() << tab << foreach << "my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) {" << endl; - *streamPtr.get() << tab_2 << printf << "\" %s(%d)\", $key, $Tkernels{$key};" << endl_tab << "}" << endl; - *streamPtr.get() << tab << print << "\"\\n\\n\";" << endl << "}" << endl; - *streamPtr.get() << "if ($print_stats) {" << endl; - *streamPtr.get() << tab << foreach << "my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) {" << endl; - *streamPtr.get() << tab_2 << printf << "\" %s %d\\n\", $key, $convertedTags{$key};" << endl_tab << "}" << endl << "}" << endl; - streamPtr.get()->flush(); - bool ret = true; - EC = sys::fs::copy_file(tmpFile, dstHipifyPerl); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << tmpFile << " to " << dstHipifyPerl << "\n"; - ret = false; - } - if (!SaveTemps) sys::fs::remove(tmpFile); - return ret; - } -} diff --git a/hipify-clang/src/CUDA2HIP_Python.cpp b/hipify-clang/src/CUDA2HIP_Python.cpp deleted file mode 100644 index fec138915d..0000000000 --- a/hipify-clang/src/CUDA2HIP_Python.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/Path.h" -#include "CUDA2HIP.h" -#include "CUDA2HIP_Scripting.h" -#include "ArgParse.h" -#include "StringUtils.h" -#include "LLVMCompat.h" -#include "Statistics.h" - -using namespace llvm; - -namespace python { - - bool generate(bool Generate) { - if (!Generate) { - return true; - } - std::string dstPythonMap = "cuda_to_hip_mappings.py", dstPythonMapDir = OutputPythonMapDir; - std::error_code EC; - if (!dstPythonMapDir.empty()) { - std::string sOutputPythonMapDirAbsPath = getAbsoluteDirectoryPath(OutputPythonMapDir, EC, "output hipify-python map"); - if (EC) { - return false; - } - dstPythonMap = sOutputPythonMapDirAbsPath + "/" + dstPythonMap; - } - SmallString<128> tmpFile; - StringRef ext = "hipify-tmp"; - EC = sys::fs::createTemporaryFile(dstPythonMap, ext, tmpFile); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << tmpFile << "\n"; - return false; - } - std::unique_ptr pythonStreamPtr = std::unique_ptr(new std::ofstream(tmpFile.c_str(), std::ios_base::trunc)); - *pythonStreamPtr.get() << "import collections\n\n"; - *pythonStreamPtr.get() << "from pyHIPIFY.constants import *\n\n"; - *pythonStreamPtr.get() << "CUDA_RENAMES_MAP = collections.OrderedDict([\n"; - const std::string sHIP_UNS = ", HIP_UNSUPPORTED"; - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - if (i == CONV_INCLUDE_CUDA_MAIN_H || i == CONV_INCLUDE) { - for (auto &ma : CUDA_INCLUDE_MAP) { - if (i == ma.second.type) { - std::string sUnsupported; - if (Statistics::isUnsupported(ma.second)) { - sUnsupported = sHIP_UNS; - } - StringRef repName = Statistics::isToRoc(ma.second) ? ma.second.rocName : ma.second.hipName; - *pythonStreamPtr.get() << " (\"" << ma.first.str() << "\", (\"" << repName.str() << "\", " << counterTypes[i] << ", " << apiTypes[ma.second.apiType] << sUnsupported << ")),\n"; - } - } - } - else { - for (auto &ma : CUDA_RENAMES_MAP()) { - if (i == ma.second.type) { - std::string sUnsupported; - if (Statistics::isUnsupported(ma.second)) { - sUnsupported = sHIP_UNS; - } - StringRef repName = Statistics::isToRoc(ma.second) ? ma.second.rocName : ma.second.hipName; - *pythonStreamPtr.get() << " (\"" << ma.first.str() << "\", (\"" << repName.str() << "\", " << counterTypes[i] << ", " << apiTypes[ma.second.apiType] << sUnsupported << ")),\n"; - } - } - } - } - *pythonStreamPtr.get() << "])\n\n"; - *pythonStreamPtr.get() << "CUDA_TO_HIP_MAPPINGS = [CUDA_RENAMES_MAP, C10_MAPPINGS, PYTORCH_SPECIFIC_MAPPINGS]\n"; - pythonStreamPtr.get()->flush(); - bool ret = true; - EC = sys::fs::copy_file(tmpFile, dstPythonMap); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << tmpFile << " to " << dstPythonMap << "\n"; - ret = false; - } - if (!SaveTemps) { - sys::fs::remove(tmpFile); - } - return true; - } -} diff --git a/hipify-clang/src/CUDA2HIP_RAND_API_functions.cpp b/hipify-clang/src/CUDA2HIP_RAND_API_functions.cpp deleted file mode 100644 index 525aa1e5f3..0000000000 --- a/hipify-clang/src/CUDA2HIP_RAND_API_functions.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_RAND_FUNCTION_MAP{ - // RAND Host functions - {"curandCreateGenerator", {"hiprandCreateGenerator", "", CONV_LIB_FUNC, API_RAND}}, - {"curandCreateGeneratorHost", {"hiprandCreateGeneratorHost", "", CONV_LIB_FUNC, API_RAND}}, - {"curandCreatePoissonDistribution", {"hiprandCreatePoissonDistribution", "", CONV_LIB_FUNC, API_RAND}}, - {"curandDestroyDistribution", {"hiprandDestroyDistribution", "", CONV_LIB_FUNC, API_RAND}}, - {"curandDestroyGenerator", {"hiprandDestroyGenerator", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerate", {"hiprandGenerate", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateLogNormal", {"hiprandGenerateLogNormal", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateLogNormalDouble", {"hiprandGenerateLogNormalDouble", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateLongLong", {"hiprandGenerateLongLong", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGenerateNormal", {"hiprandGenerateNormal", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateNormalDouble", {"hiprandGenerateNormalDouble", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGeneratePoisson", {"hiprandGeneratePoisson", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateSeeds", {"hiprandGenerateSeeds", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateUniform", {"hiprandGenerateUniform", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateUniformDouble", {"hiprandGenerateUniformDouble", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGetDirectionVectors32", {"hiprandGetDirectionVectors32", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetDirectionVectors64", {"hiprandGetDirectionVectors64", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetProperty", {"hiprandGetProperty", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetScrambleConstants32", {"hiprandGetScrambleConstants32", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetScrambleConstants64", {"hiprandGetScrambleConstants64", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetVersion", {"hiprandGetVersion", "", CONV_LIB_FUNC, API_RAND}}, - {"curandSetGeneratorOffset", {"hiprandSetGeneratorOffset", "", CONV_LIB_FUNC, API_RAND}}, - {"curandSetGeneratorOrdering", {"hiprandSetGeneratorOrdering", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandSetPseudoRandomGeneratorSeed", {"hiprandSetPseudoRandomGeneratorSeed", "", CONV_LIB_FUNC, API_RAND}}, - {"curandSetQuasiRandomGeneratorDimensions", {"hiprandSetQuasiRandomGeneratorDimensions", "", CONV_LIB_FUNC, API_RAND}}, - {"curandSetStream", {"hiprandSetStream", "", CONV_LIB_FUNC, API_RAND}}, - {"curandMakeMTGP32Constants", {"hiprandMakeMTGP32Constants", "", CONV_LIB_FUNC, API_RAND}}, - {"curandMakeMTGP32KernelState", {"hiprandMakeMTGP32KernelState", "", CONV_LIB_FUNC, API_RAND}}, - - // RAND Device functions - {"curand", {"hiprand", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_init", {"hiprand_init", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal", {"hiprand_log_normal", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal_double", {"hiprand_log_normal_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal2", {"hiprand_log_normal2", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal2_double", {"hiprand_log_normal2_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal4", {"hiprand_log_normal4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal4_double", {"hiprand_log_normal4_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_mtgp32_single", {"hiprand_mtgp32_single", "", CONV_LIB_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curand_mtgp32_single_specific", {"hiprand_mtgp32_single_specific", "", CONV_LIB_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curand_mtgp32_specific", {"hiprand_mtgp32_specific", "", CONV_LIB_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curand_normal", {"hiprand_normal", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal_double", {"hiprand_normal_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal2", {"hiprand_normal2", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal2_double", {"hiprand_normal2_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal4", {"hiprand_normal4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal4_double", {"hiprand_normal4_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform", {"hiprand_uniform", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform_double", {"hiprand_uniform_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform2_double", {"hiprand_uniform2_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform4", {"hiprand_uniform4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform4_double", {"hiprand_uniform4_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_discrete", {"hiprand_discrete", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_discrete4", {"hiprand_discrete4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_poisson", {"hiprand_poisson", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_poisson4", {"hiprand_poisson4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_Philox4x32_10", {"hiprand_Philox4x32_10", "", CONV_LIB_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}}, - // unchanged function names: skipahead, skipahead_sequence, skipahead_subsequence -}; diff --git a/hipify-clang/src/CUDA2HIP_RAND_API_types.cpp b/hipify-clang/src/CUDA2HIP_RAND_API_types.cpp deleted file mode 100644 index 67ecc93504..0000000000 --- a/hipify-clang/src/CUDA2HIP_RAND_API_types.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_RAND_TYPE_NAME_MAP{ - // RAND defines - {"CURAND_VER_MAJOR", {"HIPRAND_VER_MAJOR", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_VER_MINOR", {"HIPRAND_VER_MINOR", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_VER_PATCH", {"HIPRAND_VER_PATCH", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_VER_BUILD", {"HIPRAND_VER_BUILD", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_VERSION", {"HIPRAND_VERSION", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - - // RAND Host types - {"curandStatus", {"hiprandStatus_t", "", CONV_TYPE, API_RAND}}, - {"curandStatus_t", {"hiprandStatus_t", "", CONV_TYPE, API_RAND}}, - {"curandRngType", {"hiprandRngType_t", "", CONV_TYPE, API_RAND}}, - {"curandRngType_t", {"hiprandRngType_t", "", CONV_TYPE, API_RAND}}, - {"curandGenerator_st", {"hiprandGenerator_st", "", CONV_TYPE, API_RAND}}, - {"curandGenerator_t", {"hiprandGenerator_t", "", CONV_TYPE, API_RAND}}, - {"curandDirectionVectorSet", {"hiprandDirectionVectorSet_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDirectionVectorSet_t", {"hiprandDirectionVectorSet_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandOrdering", {"hiprandOrdering_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandOrdering_t", {"hiprandOrdering_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistribution_st", {"hiprandDistribution_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2V_st", {"hiprandDistribution_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistribution_t", {"hiprandDistribution_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2V_t", {"hiprandDistribution_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistributionShift_st", {"hiprandDistributionShift_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistributionShift_t", {"hiprandDistributionShift_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistributionM2Shift_st", {"hiprandDistributionM2Shift_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistributionM2Shift_t", {"hiprandDistributionM2Shift_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2_st", {"hiprandHistogramM2_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2_t", {"hiprandHistogramM2_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2K_st", {"hiprandHistogramM2K_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2K_t", {"hiprandHistogramM2K_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDiscreteDistribution_st", {"hiprandDiscreteDistribution_st", "", CONV_TYPE, API_RAND}}, - {"curandDiscreteDistribution_t", {"hiprandDiscreteDistribution_t", "", CONV_TYPE, API_RAND}}, - {"curandMethod", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandMethod_t", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDirectionVectors32_t", {"hiprandDirectionVectors32_t", "", CONV_TYPE, API_RAND}}, - {"curandDirectionVectors64_t", {"hiprandDirectionVectors64_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - - // RAND types for Device functions - {"curandStateMtgp32", {"hiprandStateMtgp32", "", CONV_TYPE, API_RAND}}, - {"curandStateMtgp32_t", {"hiprandStateMtgp32_t", "", CONV_TYPE, API_RAND}}, - {"curandStateScrambledSobol64", {"hiprandStateScrambledSobol64", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateScrambledSobol64_t", {"hiprandStateScrambledSobol64_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateSobol64", {"hiprandStateSobol64", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateSobol64_t", {"hiprandStateSobol64_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateScrambledSobol32", {"hiprandStateScrambledSobol32", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateScrambledSobol32_t", {"hiprandStateScrambledSobol32_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateSobol32", {"hiprandStateSobol32", "", CONV_TYPE, API_RAND}}, - {"curandStateSobol32_t", {"hiprandStateSobol32_t", "", CONV_TYPE, API_RAND}}, - {"curandStateMRG32k3a", {"hiprandStateMRG32k3a", "", CONV_TYPE, API_RAND}}, - {"curandStateMRG32k3a_t", {"hiprandStateMRG32k3a_t", "", CONV_TYPE, API_RAND}}, - {"curandStatePhilox4_32_10", {"hiprandStatePhilox4_32_10", "", CONV_TYPE, API_RAND}}, - {"curandStatePhilox4_32_10_t", {"hiprandStatePhilox4_32_10_t", "", CONV_TYPE, API_RAND}}, - {"curandStateXORWOW", {"hiprandStateXORWOW", "", CONV_TYPE, API_RAND}}, - {"curandStateXORWOW_t", {"hiprandStateXORWOW_t", "", CONV_TYPE, API_RAND}}, - {"curandState", {"hiprandState", "", CONV_TYPE, API_RAND}}, - {"curandState_t", {"hiprandState_t", "", CONV_TYPE, API_RAND}}, - - // RAND function call status types (enum curandStatus) - {"CURAND_STATUS_SUCCESS", {"HIPRAND_STATUS_SUCCESS", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_VERSION_MISMATCH", {"HIPRAND_STATUS_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_NOT_INITIALIZED", {"HIPRAND_STATUS_NOT_INITIALIZED", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_ALLOCATION_FAILED", {"HIPRAND_STATUS_ALLOCATION_FAILED", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_TYPE_ERROR", {"HIPRAND_STATUS_TYPE_ERROR", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_OUT_OF_RANGE", {"HIPRAND_STATUS_OUT_OF_RANGE", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_LENGTH_NOT_MULTIPLE", {"HIPRAND_STATUS_LENGTH_NOT_MULTIPLE", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_DOUBLE_PRECISION_REQUIRED", {"HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_LAUNCH_FAILURE", {"HIPRAND_STATUS_LAUNCH_FAILURE", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_PREEXISTING_FAILURE", {"HIPRAND_STATUS_PREEXISTING_FAILURE", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_INITIALIZATION_FAILED", {"HIPRAND_STATUS_INITIALIZATION_FAILED", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_ARCH_MISMATCH", {"HIPRAND_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_INTERNAL_ERROR", {"HIPRAND_STATUS_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_RAND}}, - - // RAND generator types (enum curandRngType) - {"CURAND_RNG_TEST", {"HIPRAND_RNG_TEST", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_DEFAULT", {"HIPRAND_RNG_PSEUDO_DEFAULT", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_XORWOW", {"HIPRAND_RNG_PSEUDO_XORWOW", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_MRG32K3A", {"HIPRAND_RNG_PSEUDO_MRG32K3A", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_MTGP32", {"HIPRAND_RNG_PSEUDO_MTGP32", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_MT19937", {"HIPRAND_RNG_PSEUDO_MT19937", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_PHILOX4_32_10", {"HIPRAND_RNG_PSEUDO_PHILOX4_32_10", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_DEFAULT", {"HIPRAND_RNG_QUASI_DEFAULT", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_SOBOL32", {"HIPRAND_RNG_QUASI_SOBOL32", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_SCRAMBLED_SOBOL32", {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL32", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_SOBOL64", {"HIPRAND_RNG_QUASI_SOBOL64", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_SCRAMBLED_SOBOL64", {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64", "", CONV_NUMERIC_LITERAL, API_RAND}}, - - // RAND ordering of results in memory (enum curandOrdering) - {"CURAND_ORDERING_PSEUDO_BEST", {"HIPRAND_ORDERING_PSEUDO_BEST", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_ORDERING_PSEUDO_DEFAULT", {"HIPRAND_ORDERING_PSEUDO_DEFAULT", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_ORDERING_PSEUDO_SEEDED", {"HIPRAND_ORDERING_PSEUDO_SEEDED", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_ORDERING_QUASI_DEFAULT", {"HIPRAND_ORDERING_QUASI_DEFAULT", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - - // RAND choice of direction vector set (enum curandDirectionVectorSet) - {"CURAND_DIRECTION_VECTORS_32_JOEKUO6", {"HIPRAND_DIRECTION_VECTORS_32_JOEKUO6", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_DIRECTION_VECTORS_64_JOEKUO6", {"HIPRAND_DIRECTION_VECTORS_64_JOEKUO6", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - - // RAND method (enum curandMethod) - {"CURAND_CHOOSE_BEST", {"HIPRAND_CHOOSE_BEST", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_ITR", {"HIPRAND_ITR", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_KNUTH", {"HIPRAND_KNUTH", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_HITR", {"HIPRAND_HITR", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_M1", {"HIPRAND_M1", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_M2", {"HIPRAND_M2", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_BINARY_SEARCH", {"HIPRAND_BINARY_SEARCH", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_DISCRETE_GAUSS", {"HIPRAND_DISCRETE_GAUSS", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_REJECTION", {"HIPRAND_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_DEVICE_API", {"HIPRAND_DEVICE_API", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_FAST_REJECTION", {"HIPRAND_FAST_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_3RD", {"HIPRAND_3RD", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_DEFINITION", {"HIPRAND_DEFINITION", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_POISSON", {"HIPRAND_POISSON", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp b/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp deleted file mode 100644 index 25fb930905..0000000000 --- a/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp +++ /dev/null @@ -1,693 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all CUDA Runtime API functions -const std::map CUDA_RUNTIME_FUNCTION_MAP{ - // 5.1. Device Management - // no analogue - {"cudaChooseDevice", {"hipChooseDevice", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGetAttribute - {"cudaDeviceGetAttribute", {"hipDeviceGetAttribute", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGetByPCIBusId - {"cudaDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaDeviceGetCacheConfig", {"hipDeviceGetCacheConfig", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetLimit - {"cudaDeviceGetLimit", {"hipDeviceGetLimit", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGetNvSciSyncAttributes - {"cudaDeviceGetNvSciSyncAttributes", {"hipDeviceGetNvSciSyncAttributes", "", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuDeviceGetP2PAttribute - {"cudaDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", "", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuDeviceGetPCIBusId - {"cudaDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetSharedMemConfig - {"cudaDeviceGetSharedMemConfig", {"hipDeviceGetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetStreamPriorityRange - {"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaDeviceReset", {"hipDeviceReset", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaDeviceSetCacheConfig", {"hipDeviceSetCacheConfig", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxSetLimit - {"cudaDeviceSetLimit", {"hipDeviceSetLimit", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxSetSharedMemConfig - {"cudaDeviceSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxSynchronize - {"cudaDeviceSynchronize", {"hipDeviceSynchronize", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGet - // NOTE: cuDeviceGet has no attr: int ordinal - {"cudaGetDevice", {"hipGetDevice", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGetCount - {"cudaGetDeviceCount", {"hipGetDeviceCount", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetFlags - // TODO: rename to hipGetDeviceFlags - {"cudaGetDeviceFlags", {"hipCtxGetFlags", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuDeviceGetProperties due to different attributes: CUdevprop and cudaDeviceProp - {"cudaGetDeviceProperties", {"hipGetDeviceProperties", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcCloseMemHandle - {"cudaIpcCloseMemHandle", {"hipIpcCloseMemHandle", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcGetEventHandle - {"cudaIpcGetEventHandle", {"hipIpcGetEventHandle", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcGetMemHandle - {"cudaIpcGetMemHandle", {"hipIpcGetMemHandle", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcOpenEventHandle - {"cudaIpcOpenEventHandle", {"hipIpcOpenEventHandle", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcOpenMemHandle - {"cudaIpcOpenMemHandle", {"hipIpcOpenMemHandle", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaSetDevice", {"hipSetDevice", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetFlags - {"cudaSetDeviceFlags", {"hipSetDeviceFlags", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaSetValidDevices", {"hipSetValidDevices", "", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.2. Thread Management [DEPRECATED] - // no analogue - {"cudaThreadExit", {"hipDeviceReset", "", CONV_THREAD, API_RUNTIME}}, - // no analogue - {"cudaThreadGetCacheConfig", {"hipDeviceGetCacheConfig", "", CONV_THREAD, API_RUNTIME}}, - // no analogue - {"cudaThreadGetLimit", {"hipThreadGetLimit", "", CONV_THREAD, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaThreadSetCacheConfig", {"hipDeviceSetCacheConfig", "", CONV_THREAD, API_RUNTIME}}, - // no analogue - {"cudaThreadSetLimit", {"hipThreadSetLimit", "", CONV_THREAD, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuCtxSynchronize - {"cudaThreadSynchronize", {"hipDeviceSynchronize", "", CONV_THREAD, API_RUNTIME}}, - - // 5.3. Error Handling - // no analogue - // NOTE: cudaGetErrorName and cuGetErrorName have different signatures - {"cudaGetErrorName", {"hipGetErrorName", "", CONV_ERROR, API_RUNTIME}}, - // no analogue - // NOTE: cudaGetErrorString and cuGetErrorString have different signatures - {"cudaGetErrorString", {"hipGetErrorString", "", CONV_ERROR, API_RUNTIME}}, - // no analogue - {"cudaGetLastError", {"hipGetLastError", "", CONV_ERROR, API_RUNTIME}}, - // no analogue - {"cudaPeekAtLastError", {"hipPeekAtLastError", "", CONV_ERROR, API_RUNTIME}}, - - // 5.4. Stream Management - // cuStreamAddCallback - {"cudaStreamAddCallback", {"hipStreamAddCallback", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamAttachMemAsync - {"cudaStreamAttachMemAsync", {"hipStreamAttachMemAsync", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuStreamBeginCapture - {"cudaStreamBeginCapture", {"hipStreamBeginCapture", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuStreamCreate due to different signatures - {"cudaStreamCreate", {"hipStreamCreate", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamCreate - {"cudaStreamCreateWithFlags", {"hipStreamCreateWithFlags", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamCreateWithPriority - {"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamDestroy - {"cudaStreamDestroy", {"hipStreamDestroy", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamEndCapture - {"cudaStreamEndCapture", {"hipStreamEndCapture", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuStreamGetFlags - {"cudaStreamGetFlags", {"hipStreamGetFlags", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamGetPriority - {"cudaStreamGetPriority", {"hipStreamGetPriority", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamIsCapturing - {"cudaStreamIsCapturing", {"hipStreamIsCapturing", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuStreamGetCaptureInfo - {"cudaStreamGetCaptureInfo", {"hipStreamGetCaptureInfo", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuStreamQuery - {"cudaStreamQuery", {"hipStreamQuery", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamSynchronize - {"cudaStreamSynchronize", {"hipStreamSynchronize", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamWaitEvent - {"cudaStreamWaitEvent", {"hipStreamWaitEvent", "", CONV_STREAM, API_RUNTIME}}, - // cuThreadExchangeStreamCaptureMode - {"cudaThreadExchangeStreamCaptureMode", {"hipThreadExchangeStreamCaptureMode", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.5.Event Management - // no analogue - // NOTE: Not equal to cuEventCreate due to different signatures - {"cudaEventCreate", {"hipEventCreate", "", CONV_EVENT, API_RUNTIME}}, - // cuEventCreate - {"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", "", CONV_EVENT, API_RUNTIME}}, - // cuEventDestroy - {"cudaEventDestroy", {"hipEventDestroy", "", CONV_EVENT, API_RUNTIME}}, - // cuEventElapsedTime - {"cudaEventElapsedTime", {"hipEventElapsedTime", "", CONV_EVENT, API_RUNTIME}}, - // cuEventQuery - {"cudaEventQuery", {"hipEventQuery", "", CONV_EVENT, API_RUNTIME}}, - // cuEventRecord - {"cudaEventRecord", {"hipEventRecord", "", CONV_EVENT, API_RUNTIME}}, - // cuEventSynchronize - {"cudaEventSynchronize", {"hipEventSynchronize", "", CONV_EVENT, API_RUNTIME}}, - - // 5.6. External Resource Interoperability - // cuDestroyExternalMemory - {"cudaDestroyExternalMemory", {"hipDestroyExternalMemory", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuDestroyExternalSemaphore - {"cudaDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuExternalMemoryGetMappedBuffer - {"cudaExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuExternalMemoryGetMappedMipmappedArray - {"cudaExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuImportExternalMemory - {"cudaImportExternalMemory", {"hipImportExternalMemory", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuImportExternalSemaphore - {"cudaImportExternalSemaphore", {"hipImportExternalSemaphore", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuSignalExternalSemaphoresAsync - {"cudaSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuWaitExternalSemaphoresAsync - {"cudaWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.7. Execution Control - // no analogue - {"cudaFuncGetAttributes", {"hipFuncGetAttributes", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - {"cudaFuncSetAttribute", {"hipFuncSetAttribute", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuFuncSetCacheConfig due to different signatures - {"cudaFuncSetCacheConfig", {"hipFuncSetCacheConfig", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuFuncSetSharedMemConfig due to different signatures - {"cudaFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaGetParameterBuffer", {"hipGetParameterBuffer", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaGetParameterBufferV2", {"hipGetParameterBufferV2", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuLaunchCooperativeKernel due to different signatures - {"cudaLaunchCooperativeKernel", {"hipLaunchCooperativeKernel", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuLaunchCooperativeKernelMultiDevice due to different signatures - {"cudaLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice", "", CONV_EXECUTION, API_RUNTIME}}, - // cuLaunchHostFunc - {"cudaLaunchHostFunc", {"hipLaunchHostFunc", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuLaunchKernel due to different signatures - {"cudaLaunchKernel", {"hipLaunchKernel", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - {"cudaSetDoubleForDevice", {"hipSetDoubleForDevice", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaSetDoubleForHost", {"hipSetDoubleForHost", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.8. Occupancy - // cuOccupancyMaxActiveBlocksPerMultiprocessor - {"cudaOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_RUNTIME}}, - // cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags - {"cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_RUNTIME}}, - // cuOccupancyMaxPotentialBlockSize - {"cudaOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_RUNTIME}}, - // cuOccupancyMaxPotentialBlockSizeWithFlags - {"cudaOccupancyMaxPotentialBlockSizeWithFlags", {"hipOccupancyMaxPotentialBlockSizeWithFlags", "", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaOccupancyMaxPotentialBlockSizeVariableSMem", {"hipOccupancyMaxPotentialBlockSizeVariableSMem", "", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", {"hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", "", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}}, - - // Former 5.9. Execution Control [DEPRECATED] - // NOTE: Removed in CUDA 10.1 - // no analogue - {"cudaConfigureCall", {"hipConfigureCall", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cudaLaunch due to different signatures - {"cudaLaunch", {"hipLaunchByPtr", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - {"cudaSetupArgument", {"hipSetupArgument", "", CONV_EXECUTION, API_RUNTIME}}, - - // 5.9. Memory Management - // no analogue - {"cudaArrayGetInfo", {"hipArrayGetInfo", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuMemFree - {"cudaFree", {"hipFree", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaFreeArray", {"hipFreeArray", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemFreeHost - {"cudaFreeHost", {"hipHostFree", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMipmappedArrayDestroy due to different signatures - {"cudaFreeMipmappedArray", {"hipFreeMipmappedArray", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMipmappedArrayGetLevel due to different signatures - {"cudaGetMipmappedArrayLevel", {"hipGetMipmappedArrayLevel", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaGetSymbolAddress", {"hipGetSymbolAddress", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaGetSymbolSize", {"hipGetSymbolSize", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostAlloc - {"cudaHostAlloc", {"hipHostMalloc", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostGetDevicePointer - {"cudaHostGetDevicePointer", {"hipHostGetDevicePointer", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostGetFlags - {"cudaHostGetFlags", {"hipHostGetFlags", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostRegister - {"cudaHostRegister", {"hipHostRegister", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostUnregister - {"cudaHostUnregister", {"hipHostUnregister", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemAlloc - {"cudaMalloc", {"hipMalloc", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMalloc3D", {"hipMalloc3D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMalloc3DArray", {"hipMalloc3DArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMallocArray", {"hipMallocArray", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostAlloc - {"cudaMallocHost", {"hipHostMalloc", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemAllocManaged - {"cudaMallocManaged", {"hipMallocManaged", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMipmappedArrayCreate due to different signatures - {"cudaMallocMipmappedArray", {"hipMallocMipmappedArray", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemAllocPitch due to different signatures - {"cudaMallocPitch", {"hipMallocPitch", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemAdvise - {"cudaMemAdvise", {"hipMemAdvise", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpy due to different signatures - {"cudaMemcpy", {"hipMemcpy", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpy2D due to different signatures - {"cudaMemcpy2D", {"hipMemcpy2D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DArrayToArray", {"hipMemcpy2DArrayToArray", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpy2DAsync due to different signatures - {"cudaMemcpy2DAsync", {"hipMemcpy2DAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DFromArray", {"hipMemcpy2DFromArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DFromArrayAsync", {"hipMemcpy2DFromArrayAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DToArray", {"hipMemcpy2DToArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DToArrayAsync", {"hipMemcpy2DToArrayAsync", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpy3D due to different signatures - {"cudaMemcpy3D", {"hipMemcpy3D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpy3DAsync due to different signatures - {"cudaMemcpy3DAsync", {"hipMemcpy3DAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpy3DPeer due to different signatures - {"cudaMemcpy3DPeer", {"hipMemcpy3DPeer", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpy3DPeerAsync due to different signatures - {"cudaMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpyAsync due to different signatures - {"cudaMemcpyAsync", {"hipMemcpyAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyFromSymbol", {"hipMemcpyFromSymbol", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyFromSymbolAsync", {"hipMemcpyFromSymbolAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpyPeer due to different signatures - {"cudaMemcpyPeer", {"hipMemcpyPeer", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpyPeerAsync due to different signatures - {"cudaMemcpyPeerAsync", {"hipMemcpyPeerAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyToSymbol", {"hipMemcpyToSymbol", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyToSymbolAsync", {"hipMemcpyToSymbolAsync", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemGetInfo - {"cudaMemGetInfo", {"hipMemGetInfo", "", CONV_MEMORY, API_RUNTIME}}, - // TODO: double check cuMemPrefetchAsync - {"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuMemRangeGetAttribute - {"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuMemRangeGetAttributes - {"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuMemsetD32 - hipMemsetD32 - {"cudaMemset", {"hipMemset", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemset2D", {"hipMemset2D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemset2DAsync", {"hipMemset2DAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemset3D", {"hipMemset3D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemset3DAsync", {"hipMemset3DAsync", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemsetD32Async - {"cudaMemsetAsync", {"hipMemsetAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"make_cudaExtent", {"make_hipExtent", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"make_cudaPitchedPtr", {"make_hipPitchedPtr", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"make_cudaPos", {"make_hipPos", "", CONV_MEMORY, API_RUNTIME}}, - - // 5.10. Memory Management [DEPRECATED] - // no analogue - // NOTE: Not equal to cuMemcpyAtoA due to different signatures - {"cudaMemcpyArrayToArray", {"hipMemcpyArrayToArray", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaMemcpyFromArray", {"hipMemcpyFromArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyFromArrayAsync", {"hipMemcpyFromArrayAsync", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaMemcpyToArray", {"hipMemcpyToArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyToArrayAsync", {"hipMemcpyToArrayAsync", "", CONV_MEMORY, API_RUNTIME}}, - - // 5.11.Unified Addressing - // no analogue - // NOTE: Not equal to cuPointerGetAttributes due to different signatures - {"cudaPointerGetAttributes", {"hipPointerGetAttributes", "", CONV_ADDRESSING, API_RUNTIME}}, - - // 5.12. Peer Device Memory Access - // cuDeviceCanAccessPeer - {"cudaDeviceCanAccessPeer", {"hipDeviceCanAccessPeer", "", CONV_PEER, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuCtxDisablePeerAccess due to different signatures - {"cudaDeviceDisablePeerAccess", {"hipDeviceDisablePeerAccess", "", CONV_PEER, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuCtxEnablePeerAccess due to different signatures - {"cudaDeviceEnablePeerAccess", {"hipDeviceEnablePeerAccess", "", CONV_PEER, API_RUNTIME}}, - - // 5.13. OpenGL Interoperability - // cuGLGetDevices - {"cudaGLGetDevices", {"hipGLGetDevices", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsGLRegisterBuffer - {"cudaGraphicsGLRegisterBuffer", {"hipGraphicsGLRegisterBuffer", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsGLRegisterImage - {"cudaGraphicsGLRegisterImage", {"hipGraphicsGLRegisterImage", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuWGLGetDevice - {"cudaWGLGetDevice", {"hipWGLGetDevice", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.14. OpenGL Interoperability [DEPRECATED] - // no analogue - // NOTE: Not equal to cuGLMapBufferObject due to different signatures - {"cudaGLMapBufferObject", {"hipGLMapBufferObject", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuGLMapBufferObjectAsync due to different signatures - {"cudaGLMapBufferObjectAsync", {"hipGLMapBufferObjectAsync", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLRegisterBufferObject - {"cudaGLRegisterBufferObject", {"hipGLRegisterBufferObject", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLSetBufferObjectMapFlags - {"cudaGLSetBufferObjectMapFlags", {"hipGLSetBufferObjectMapFlags", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaGLSetGLDevice", {"hipGLSetGLDevice", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLUnmapBufferObject - {"cudaGLUnmapBufferObject", {"hipGLUnmapBufferObject", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLUnmapBufferObjectAsync - {"cudaGLUnmapBufferObjectAsync", {"hipGLUnmapBufferObjectAsync", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLUnregisterBufferObject - {"cudaGLUnregisterBufferObject", {"hipGLUnregisterBufferObject", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.15. Direct3D 9 Interoperability - // cuD3D9GetDevice - {"cudaD3D9GetDevice", {"hipD3D9GetDevice", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9GetDevices - {"cudaD3D9GetDevices", {"hipD3D9GetDevices", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9GetDirect3DDevice - {"cudaD3D9GetDirect3DDevice", {"hipD3D9GetDirect3DDevice", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaD3D9SetDirect3DDevice", {"hipD3D9SetDirect3DDevice", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsD3D9RegisterResource - {"cudaGraphicsD3D9RegisterResource", {"hipGraphicsD3D9RegisterResource", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.16.Direct3D 9 Interoperability[DEPRECATED] - // cuD3D9MapResources - {"cudaD3D9MapResources", {"hipD3D9MapResources", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9RegisterResource - {"cudaD3D9RegisterResource", {"hipD3D9RegisterResource", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceGetMappedArray - {"cudaD3D9ResourceGetMappedArray", {"hipD3D9ResourceGetMappedArray", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedPitch - {"cudaD3D9ResourceGetMappedPitch", {"hipD3D9ResourceGetMappedPitch", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceGetMappedPointer - {"cudaD3D9ResourceGetMappedPointer", {"hipD3D9ResourceGetMappedPointer", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceGetMappedSize - {"cudaD3D9ResourceGetMappedSize", {"hipD3D9ResourceGetMappedSize", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceGetSurfaceDimensions - {"cudaD3D9ResourceGetSurfaceDimensions", {"hipD3D9ResourceGetSurfaceDimensions", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceSetMapFlags - {"cudaD3D9ResourceSetMapFlags", {"hipD3D9ResourceSetMapFlags", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9UnmapResources - {"cudaD3D9UnmapResources", {"hipD3D9UnmapResources", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9UnregisterResource - {"cudaD3D9UnregisterResource", {"hipD3D9UnregisterResource", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.17. Direct3D 10 Interoperability - // cuD3D10GetDevice - {"cudaD3D10GetDevice", {"hipD3D10GetDevice", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10GetDevices - {"cudaD3D10GetDevices", {"hipD3D10GetDevices", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsD3D10RegisterResource - {"cudaGraphicsD3D10RegisterResource", {"hipGraphicsD3D10RegisterResource", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.18. Direct3D 10 Interoperability [DEPRECATED] - // cudaD3D10GetDirect3DDevice - {"cudaD3D10GetDirect3DDevice", {"hipD3D10GetDirect3DDevice", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10MapResources - {"cudaD3D10MapResources", {"hipD3D10MapResources", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10RegisterResource - {"cudaD3D10RegisterResource", {"hipD3D10RegisterResource", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceGetMappedArray - {"cudaD3D10ResourceGetMappedArray", {"hipD3D10ResourceGetMappedArray", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedPitch - {"cudaD3D10ResourceGetMappedPitch", {"hipD3D10ResourceGetMappedPitch", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceGetMappedPointer - {"cudaD3D10ResourceGetMappedPointer", {"hipD3D10ResourceGetMappedPointer", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceGetMappedSize - {"cudaD3D10ResourceGetMappedSize", {"hipD3D10ResourceGetMappedSize", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceGetSurfaceDimensions - {"cudaD3D10ResourceGetSurfaceDimensions", {"hipD3D10ResourceGetSurfaceDimensions", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceSetMapFlags - {"cudaD3D10ResourceSetMapFlags", {"hipD3D10ResourceSetMapFlags", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaD3D10SetDirect3DDevice", {"hipD3D10SetDirect3DDevice", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10UnmapResources - {"cudaD3D10UnmapResources", {"hipD3D10UnmapResources", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10UnregisterResource - {"cudaD3D10UnregisterResource", {"hipD3D10UnregisterResource", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.19. Direct3D 11 Interoperability - // cuD3D11GetDevice - {"cudaD3D11GetDevice", {"hipD3D11GetDevice", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D11GetDevices - {"cudaD3D11GetDevices", {"hipD3D11GetDevices", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsD3D11RegisterResource - {"cudaGraphicsD3D11RegisterResource", {"hipGraphicsD3D11RegisterResource", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.20. Direct3D 11 Interoperability [DEPRECATED] - // cuD3D11GetDirect3DDevice - {"cudaD3D11GetDirect3DDevice", {"hipD3D11GetDirect3DDevice", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaD3D11SetDirect3DDevice", {"hipD3D11SetDirect3DDevice", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.21. VDPAU Interoperability - // cuGraphicsVDPAURegisterOutputSurface - {"cudaGraphicsVDPAURegisterOutputSurface", {"hipGraphicsVDPAURegisterOutputSurface", "", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsVDPAURegisterVideoSurface - {"cudaGraphicsVDPAURegisterVideoSurface", {"hipGraphicsVDPAURegisterVideoSurface", "", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuVDPAUGetDevice - {"cudaVDPAUGetDevice", {"hipVDPAUGetDevice", "", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaVDPAUSetVDPAUDevice", {"hipVDPAUSetDevice", "", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.22. EGL Interoperability - // cuEGLStreamConsumerAcquireFrame - {"cudaEGLStreamConsumerAcquireFrame", {"hipEGLStreamConsumerAcquireFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamConsumerConnect - {"cudaEGLStreamConsumerConnect", {"hipEGLStreamConsumerConnect", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamConsumerConnectWithFlags - {"cudaEGLStreamConsumerConnectWithFlags", {"hipEGLStreamConsumerConnectWithFlags", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamConsumerDisconnect - {"cudaEGLStreamConsumerDisconnect", {"hipEGLStreamConsumerDisconnect", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamConsumerReleaseFrame - {"cudaEGLStreamConsumerReleaseFrame", {"hipEGLStreamConsumerReleaseFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamProducerConnect - {"cudaEGLStreamProducerConnect", {"hipEGLStreamProducerConnect", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamProducerDisconnect - {"cudaEGLStreamProducerDisconnect", {"hipEGLStreamProducerDisconnect", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamProducerPresentFrame - {"cudaEGLStreamProducerPresentFrame", {"hipEGLStreamProducerPresentFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamProducerReturnFrame - {"cudaEGLStreamProducerReturnFrame", {"hipEGLStreamProducerReturnFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEventCreateFromEGLSync - {"cudaEventCreateFromEGLSync", {"hipEventCreateFromEGLSync", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsEGLRegisterImage - {"cudaGraphicsEGLRegisterImage", {"hipGraphicsEGLRegisterImage", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsResourceGetMappedEglFrame - {"cudaGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.23. Graphics Interoperability - // cuGraphicsMapResources - {"cudaGraphicsMapResources", {"hipGraphicsMapResources", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsResourceGetMappedMipmappedArray - {"cudaGraphicsResourceGetMappedMipmappedArray", {"hipGraphicsResourceGetMappedMipmappedArray", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsResourceGetMappedPointer - {"cudaGraphicsResourceGetMappedPointer", {"hipGraphicsResourceGetMappedPointer", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsResourceSetMapFlags - {"cudaGraphicsResourceSetMapFlags", {"hipGraphicsResourceSetMapFlags", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsSubResourceGetMappedArray - {"cudaGraphicsSubResourceGetMappedArray", {"hipGraphicsSubResourceGetMappedArray", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsUnmapResources - {"cudaGraphicsUnmapResources", {"hipGraphicsUnmapResources", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsUnregisterResource - {"cudaGraphicsUnregisterResource", {"hipGraphicsUnregisterResource", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.24. Texture Reference Management [DEPRECATED] - // no analogue - {"cudaBindTexture", {"hipBindTexture", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaBindTexture2D", {"hipBindTexture2D", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaBindTextureToArray", {"hipBindTextureToArray", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - // NOTE: Unsupported yet on NVCC path - {"cudaBindTextureToMipmappedArray", {"hipBindTextureToMipmappedArray", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaCreateChannelDesc", {"hipCreateChannelDesc", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaGetChannelDesc", {"hipGetChannelDesc", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaGetTextureAlignmentOffset", {"hipGetTextureAlignmentOffset", "", CONV_TEXTURE, API_RUNTIME}}, - // TODO: double check cuModuleGetTexRef - // NOTE: Unsupported yet on NVCC path - {"cudaGetTextureReference", {"hipGetTextureReference", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaUnbindTexture", {"hipUnbindTexture", "", CONV_TEXTURE, API_RUNTIME}}, - - // 5.25. Surface Reference Management [DEPRECATED] - // no analogue - {"cudaBindSurfaceToArray", {"hipBindSurfaceToArray", "", CONV_SURFACE, API_RUNTIME, HIP_UNSUPPORTED}}, - // TODO: double check cuModuleGetSurfRef - {"cudaGetSurfaceReference", {"hipGetSurfaceReference", "", CONV_SURFACE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.26. Texture Object Management - // no analogue - // NOTE: Not equal to cuTexObjectCreate due to different signatures - {"cudaCreateTextureObject", {"hipCreateTextureObject", "", CONV_TEXTURE, API_RUNTIME}}, - // cuTexObjectDestroy - {"cudaDestroyTextureObject", {"hipDestroyTextureObject", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuTexObjectGetResourceDesc due to different signatures - {"cudaGetTextureObjectResourceDesc", {"hipGetTextureObjectResourceDesc", "", CONV_TEXTURE, API_RUNTIME}}, - // cuTexObjectGetResourceViewDesc - {"cudaGetTextureObjectResourceViewDesc", {"hipGetTextureObjectResourceViewDesc", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cudaGetTextureObjectTextureDesc due to different signatures - {"cuTexObjectGetTextureDesc", {"hipGetTextureObjectTextureDesc", "", CONV_TEXTURE, API_RUNTIME}}, - - // 5.27. Surface Object Management - // no analogue - // NOTE: Not equal to cuSurfObjectCreate due to different signatures - {"cudaCreateSurfaceObject", {"hipCreateSurfaceObject", "", CONV_SURFACE, API_RUNTIME}}, - // cuSurfObjectDestroy - {"cudaDestroySurfaceObject", {"hipDestroySurfaceObject", "", CONV_SURFACE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuSurfObjectGetResourceDesc due to different signatures - {"cudaGetSurfaceObjectResourceDesc", {"hipGetSurfaceObjectResourceDesc", "", CONV_SURFACE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.28.Version Management - // cuDriverGetVersion - {"cudaDriverGetVersion", {"hipDriverGetVersion", "", CONV_VERSION, API_RUNTIME}}, - // no analogue - {"cudaRuntimeGetVersion", {"hipRuntimeGetVersion", "", CONV_VERSION, API_RUNTIME}}, - - // 5.29. Graph Management - // cuGraphAddChildGraphNode - {"cudaGraphAddChildGraphNode", {"hipGraphAddChildGraphNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddDependencies - {"cudaGraphAddDependencies", {"hipGraphAddDependencies", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddEmptyNode - {"cudaGraphAddEmptyNode", {"hipGraphAddEmptyNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddHostNode - {"cudaGraphAddHostNode", {"hipGraphAddHostNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddKernelNode - {"cudaGraphAddKernelNode", {"hipGraphAddKernelNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddMemcpyNode - {"cudaGraphAddMemcpyNode", {"hipGraphAddMemcpyNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddMemsetNode - {"cudaGraphAddMemsetNode", {"hipGraphAddMemsetNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphChildGraphNodeGetGraph - {"cudaGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphClone - {"cudaGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphCreate - {"cudaGraphCreate", {"hipGraphCreate", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphDestroy - {"cudaGraphDestroy", {"hipGraphDestroy", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphDestroyNode - {"cudaGraphDestroyNode", {"hipGraphDestroyNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecDestroy - {"cudaGraphExecDestroy", {"hipGraphExecDestroy", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphGetEdges - {"cudaGraphGetEdges", {"hipGraphGetEdges", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphGetNodes - {"cudaGraphGetNodes", {"hipGraphGetNodes", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphGetRootNodes - {"cudaGraphGetRootNodes", {"hipGraphGetRootNodes", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphHostNodeGetParams - {"cudaGraphHostNodeGetParams", {"hipGraphHostNodeGetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphHostNodeSetParams - {"cudaGraphHostNodeSetParams", {"hipGraphHostNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphInstantiate - {"cudaGraphInstantiate", {"hipGraphInstantiate", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecKernelNodeSetParams - {"cudaGraphExecKernelNodeSetParams", {"hipGraphExecKernelNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecMemcpyNodeSetParams - {"cudaGraphExecMemcpyNodeSetParams", {"hipGraphExecMemcpyNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecMemsetNodeSetParams - {"cudaGraphExecMemsetNodeSetParams", {"hipGraphExecMemsetNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecHostNodeSetParams - {"cudaGraphExecHostNodeSetParams", {"hipGraphExecHostNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecUpdate - {"cudaGraphExecUpdate", {"hipGraphExecUpdate", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphKernelNodeGetParams - {"cudaGraphKernelNodeGetParams", {"hipGraphKernelNodeGetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphKernelNodeSetParams - {"cudaGraphKernelNodeSetParams", {"hipGraphKernelNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphLaunch - {"cudaGraphLaunch", {"hipGraphLaunch", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphMemcpyNodeGetParams - {"cudaGraphMemcpyNodeGetParams", {"hipGraphMemcpyNodeGetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphMemcpyNodeSetParams - {"cudaGraphMemcpyNodeSetParams", {"hipGraphMemcpyNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphMemsetNodeGetParams - {"cudaGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphMemsetNodeSetParams - {"cudaGraphMemsetNodeSetParams", {"hipGraphMemsetNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphNodeFindInClone - {"cudaGraphNodeFindInClone", {"hipGraphNodeFindInClone", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphNodeGetDependencies - {"cudaGraphNodeGetDependencies", {"hipGraphNodeGetDependencies", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphNodeGetDependentNodes - {"cudaGraphNodeGetDependentNodes", {"hipGraphNodeGetDependentNodes", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphNodeGetType - {"cudaGraphNodeGetType", {"hipGraphNodeGetType", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphRemoveDependencies - {"cudaGraphRemoveDependencies", {"hipGraphRemoveDependencies", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.32. Profiler Control - // cuProfilerInitialize - {"cudaProfilerInitialize", {"hipProfilerInitialize", "", CONV_PROFILER, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuProfilerStart - {"cudaProfilerStart", {"hipProfilerStart", "", CONV_PROFILER, API_RUNTIME}}, - // cuProfilerStop - {"cudaProfilerStop", {"hipProfilerStop", "", CONV_PROFILER, API_RUNTIME}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp b/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp deleted file mode 100644 index 6eb9bfb2be..0000000000 --- a/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp +++ /dev/null @@ -1,1426 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA RUNTIME API types to the corresponding HIP types -const std::map CUDA_RUNTIME_TYPE_NAME_MAP { - - // 1. Structs - - // no analogue - {"cudaChannelFormatDesc", {"hipChannelFormatDesc", "", CONV_TYPE, API_RUNTIME}}, - // no analogue - {"cudaDeviceProp", {"hipDeviceProp_t", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - {"cudaEglFrame", {"hipEglFrame", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaEglFrame_st", {"hipEglFrame", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - {"cudaEglPlaneDesc", {"hipEglPlaneDesc", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaEglPlaneDesc_st", {"hipEglPlaneDesc", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - {"cudaExtent", {"hipExtent", "", CONV_TYPE, API_RUNTIME}}, - - // CUDA_EXTERNAL_MEMORY_BUFFER_DESC - {"cudaExternalMemoryBufferDesc", {"HIP_EXTERNAL_MEMORY_BUFFER_DESC", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_MEMORY_HANDLE_DESC - {"cudaExternalMemoryHandleDesc", {"HIP_EXTERNAL_MEMORY_HANDLE_DESC", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC - {"cudaExternalMemoryMipmappedArrayDesc", {"HIP_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC - {"cudaExternalSemaphoreHandleDesc", {"HIP_EXTERNAL_SEMAPHORE_HANDLE_DESC", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS - {"cudaExternalSemaphoreSignalParams", {"HIP_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS - {"cudaExternalSemaphoreWaitParams", {"HIP_EXTERNAL_SEMAPHORE_WAIT_PARAMS", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - {"cudaFuncAttributes", {"hipFuncAttributes", "", CONV_TYPE, API_RUNTIME}}, - - // CUDA_HOST_NODE_PARAMS - {"cudaHostNodeParams", {"HIP_HOST_NODE_PARAMS", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUipcEventHandle - {"cudaIpcEventHandle_t", {"ihipIpcEventHandle_t", "", CONV_TYPE, API_RUNTIME}}, - // CUipcEventHandle_st - {"cudaIpcEventHandle_st", {"ihipIpcEventHandle_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUipcMemHandle - {"cudaIpcMemHandle_t", {"hipIpcMemHandle_t", "", CONV_TYPE, API_RUNTIME}}, - // CUipcMemHandle_st - {"cudaIpcMemHandle_st", {"hipIpcMemHandle_st", "", CONV_TYPE, API_RUNTIME}}, - - // CUDA_KERNEL_NODE_PARAMS - {"cudaKernelNodeParams", {"hipKernelNodeParams", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - // CUDA_LAUNCH_PARAMS struct differs - {"cudaLaunchParams", {"hipLaunchParams", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - // NOTE: HIP struct is bigger and contains cudaMemcpy3DParms only in the beginning - {"cudaMemcpy3DParms", {"hipMemcpy3DParms", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - {"cudaMemcpy3DPeerParms", {"hipMemcpy3DPeerParms", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_MEMSET_NODE_PARAMS - {"cudaMemsetParams", {"hipMemsetParams", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - {"cudaPitchedPtr", {"hipPitchedPtr", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - {"cudaPointerAttributes", {"hipPointerAttribute_t", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - {"cudaPos", {"hipPos", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - // NOTE: CUDA_RESOURCE_DESC struct differs - {"cudaResourceDesc", {"hipResourceDesc", "", CONV_TYPE, API_RUNTIME}}, - - // NOTE: CUDA_RESOURCE_VIEW_DESC has reserved bytes in the end - {"cudaResourceViewDesc", {"hipResourceViewDesc", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - // NOTE: CUDA_TEXTURE_DESC differs - {"cudaTextureDesc", {"hipTextureDesc", "", CONV_TYPE, API_RUNTIME}}, - - // NOTE: the same struct and its name - {"CUuuid_st", {"hipUUID", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // NOTE: possibly CUsurfref is analogue - {"surfaceReference", {"hipSurfaceReference", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // the same - CUevent_st - {"CUevent_st", {"ihipEvent_t", "", CONV_TYPE, API_RUNTIME}}, - // CUevent - {"cudaEvent_t", {"hipEvent_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUextMemory_st - {"CUexternalMemory_st", {"hipExtMemory_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUexternalMemory - {"cudaExternalMemory_t", {"hipExternalMemory", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUextSemaphore_st - {"CUexternalSemaphore_st", {"hipExtSemaphore_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUexternalSemaphore - {"cudaExternalSemaphore_t", {"hipExternalSemaphore", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // the same - CUgraph_st - {"CUgraph_st", {"hipGraph_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUgraph - {"cudaGraph_t", {"hipGraph", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // the same -CUgraphExec_st - {"CUgraphExec_st", {"hipGraphExec_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUgraphExec - {"cudaGraphExec_t", {"hipGraphExec", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUgraphicsResource_st - {"cudaGraphicsResource", {"hipGraphicsResource_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUgraphicsResource - {"cudaGraphicsResource_t", {"hipGraphicsResource_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // the same - CUgraphNode_st - {"CUgraphNode_st", {"hipGraphNode_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUgraphNode - {"cudaGraphNode_t", {"hipGraphNode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUeglStreamConnection_st - {"CUeglStreamConnection_st", {"hipEglStreamConnection", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUeglStreamConnection - {"cudaEglStreamConnection", {"hipEglStreamConnection", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUarray_st - {"cudaArray", {"hipArray", "", CONV_TYPE, API_RUNTIME}}, - // CUarray - {"cudaArray_t", {"hipArray_t", "", CONV_TYPE, API_RUNTIME}}, - // no analogue - {"cudaArray_const_t", {"hipArray_const_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUmipmappedArray_st - {"cudaMipmappedArray", {"hipMipmappedArray", "", CONV_TYPE, API_RUNTIME}}, - // CUmipmappedArray - {"cudaMipmappedArray_t", {"hipMipmappedArray_t", "", CONV_TYPE, API_RUNTIME}}, - // no analogue - {"cudaMipmappedArray_const_t", {"hipMipmappedArray_const_t", "", CONV_TYPE, API_RUNTIME}}, - - // the same - CUstream_st - {"CUstream_st", {"ihipStream_t", "", CONV_TYPE, API_RUNTIME}}, - // CUstream - {"cudaStream_t", {"hipStream_t", "", CONV_TYPE, API_RUNTIME}}, - - // 3. Enums - - // no analogue - {"cudaCGScope", {"hipCGScope", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaCGScope enum values - {"cudaCGScopeInvalid", {"hipCGScopeInvalid", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - {"cudaCGScopeGrid", {"hipCGScopeGrid", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - {"cudaCGScopeMultiGrid", {"hipCGScopeMultiGrid", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // no analogue - {"cudaChannelFormatKind", {"hipChannelFormatKind", "", CONV_TYPE, API_RUNTIME}}, - // cudaChannelFormatKind enum values - {"cudaChannelFormatKindSigned", {"hipChannelFormatKindSigned", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaChannelFormatKindUnsigned", {"hipChannelFormatKindUnsigned", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - {"cudaChannelFormatKindFloat", {"hipChannelFormatKindFloat", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - {"cudaChannelFormatKindNone", {"hipChannelFormatKindNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - - // CUcomputemode - {"cudaComputeMode", {"hipComputeMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaComputeMode enum values - // CU_COMPUTEMODE_DEFAULT - {"cudaComputeModeDefault", {"hipComputeModeDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CU_COMPUTEMODE_EXCLUSIVE - {"cudaComputeModeExclusive", {"hipComputeModeExclusive", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CU_COMPUTEMODE_PROHIBITED - {"cudaComputeModeProhibited", {"hipComputeModeProhibited", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - // CU_COMPUTEMODE_EXCLUSIVE_PROCESS - {"cudaComputeModeExclusiveProcess", {"hipComputeModeExclusiveProcess", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - - // CUdevice_attribute - {"cudaDeviceAttr", {"hipDeviceAttribute_t", "", CONV_TYPE, API_RUNTIME}}, - // cudaDeviceAttr enum values - // CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK - {"cudaDevAttrMaxThreadsPerBlock", {"hipDeviceAttributeMaxThreadsPerBlock", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X - {"cudaDevAttrMaxBlockDimX", {"hipDeviceAttributeMaxBlockDimX", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - // CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y - {"cudaDevAttrMaxBlockDimY", {"hipDeviceAttributeMaxBlockDimY", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - // CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z - {"cudaDevAttrMaxBlockDimZ", {"hipDeviceAttributeMaxBlockDimZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 - // CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X - {"cudaDevAttrMaxGridDimX", {"hipDeviceAttributeMaxGridDimX", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 5 - // CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y - {"cudaDevAttrMaxGridDimY", {"hipDeviceAttributeMaxGridDimY", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 6 - // CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z - {"cudaDevAttrMaxGridDimZ", {"hipDeviceAttributeMaxGridDimZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 7 - // CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK - {"cudaDevAttrMaxSharedMemoryPerBlock", {"hipDeviceAttributeMaxSharedMemoryPerBlock", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 8 - // CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY - {"cudaDevAttrTotalConstantMemory", {"hipDeviceAttributeTotalConstantMemory", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 9 - // CU_DEVICE_ATTRIBUTE_WARP_SIZE - {"cudaDevAttrWarpSize", {"hipDeviceAttributeWarpSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 10 - // CU_DEVICE_ATTRIBUTE_MAX_PITCH - {"cudaDevAttrMaxPitch", {"hipDeviceAttributeMaxPitch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 11 - // CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK - {"cudaDevAttrMaxRegistersPerBlock", {"hipDeviceAttributeMaxRegistersPerBlock", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 12 - // CU_DEVICE_ATTRIBUTE_CLOCK_RATE - {"cudaDevAttrClockRate", {"hipDeviceAttributeClockRate", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 13 - // CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT - {"cudaDevAttrTextureAlignment", {"hipDeviceAttributeTextureAlignment", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 14 - // CU_DEVICE_ATTRIBUTE_GPU_OVERLAP - // NOTE: Is not deprecated as CUDA Driver's API analogue CU_DEVICE_ATTRIBUTE_GPU_OVERLAP - {"cudaDevAttrGpuOverlap", {"hipDeviceAttributeGpuOverlap", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 15 - // CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT - {"cudaDevAttrMultiProcessorCount", {"hipDeviceAttributeMultiprocessorCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 16 - // CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT - {"cudaDevAttrKernelExecTimeout", {"hipDeviceAttributeKernelExecTimeout", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 17 - // CU_DEVICE_ATTRIBUTE_INTEGRATED - {"cudaDevAttrIntegrated", {"hipDeviceAttributeIntegrated", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 18 - // CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY - {"cudaDevAttrCanMapHostMemory", {"hipDeviceAttributeCanMapHostMemory", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 19 - // CU_DEVICE_ATTRIBUTE_COMPUTE_MODE - {"cudaDevAttrComputeMode", {"hipDeviceAttributeComputeMode", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 20 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH - {"cudaDevAttrMaxTexture1DWidth", {"hipDeviceAttributeMaxTexture1DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 21 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH - {"cudaDevAttrMaxTexture2DWidth", {"hipDeviceAttributeMaxTexture2DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 22 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT - {"cudaDevAttrMaxTexture2DHeight", {"hipDeviceAttributeMaxTexture2DHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 23 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH - {"cudaDevAttrMaxTexture3DWidth", {"hipDeviceAttributeMaxTexture3DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 24 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT - {"cudaDevAttrMaxTexture3DHeight", {"hipDeviceAttributeMaxTexture3DHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 25 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH - {"cudaDevAttrMaxTexture3DDepth", {"hipDeviceAttributeMaxTexture3DDepth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 26 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH - {"cudaDevAttrMaxTexture2DLayeredWidth", {"hipDeviceAttributeMaxTexture2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 27 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT - {"cudaDevAttrMaxTexture2DLayeredHeight", {"hipDeviceAttributeMaxTexture2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 28 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS - {"cudaDevAttrMaxTexture2DLayeredLayers", {"hipDeviceAttributeMaxTexture2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 29 - // CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT - {"cudaDevAttrSurfaceAlignment", {"hipDeviceAttributeSurfaceAlignment", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 30 - // CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS - {"cudaDevAttrConcurrentKernels", {"hipDeviceAttributeConcurrentKernels", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 31 - // CU_DEVICE_ATTRIBUTE_ECC_ENABLED - {"cudaDevAttrEccEnabled", {"hipDeviceAttributeEccEnabled", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 32 - // CU_DEVICE_ATTRIBUTE_PCI_BUS_ID - {"cudaDevAttrPciBusId", {"hipDeviceAttributePciBusId", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 33 - // CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID - {"cudaDevAttrPciDeviceId", {"hipDeviceAttributePciDeviceId", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 34 - // CU_DEVICE_ATTRIBUTE_TCC_DRIVER - {"cudaDevAttrTccDriver", {"hipDeviceAttributeTccDriver", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 35 - // CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE - {"cudaDevAttrMemoryClockRate", {"hipDeviceAttributeMemoryClockRate", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 36 - // CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH - {"cudaDevAttrGlobalMemoryBusWidth", {"hipDeviceAttributeMemoryBusWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 37 - // CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE - {"cudaDevAttrL2CacheSize", {"hipDeviceAttributeL2CacheSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 38 - // CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR - {"cudaDevAttrMaxThreadsPerMultiProcessor", {"hipDeviceAttributeMaxThreadsPerMultiProcessor", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 39 - // CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT - {"cudaDevAttrAsyncEngineCount", {"hipDeviceAttributeAsyncEngineCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 40 - // CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING - {"cudaDevAttrUnifiedAddressing", {"hipDeviceAttributeUnifiedAddressing", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 41 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH - {"cudaDevAttrMaxTexture1DLayeredWidth", {"hipDeviceAttributeMaxTexture1DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 42 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS - {"cudaDevAttrMaxTexture1DLayeredLayers", {"hipDeviceAttributeMaxTexture1DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 43 - // 44 - no - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH - {"cudaDevAttrMaxTexture2DGatherWidth", {"hipDeviceAttributeMaxTexture2DGatherWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 45 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT - {"cudaDevAttrMaxTexture2DGatherHeight", {"hipDeviceAttributeMaxTexture2DGatherHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 46 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE - {"cudaDevAttrMaxTexture3DWidthAlt", {"hipDeviceAttributeMaxTexture3DWidthAlternate", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 47 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE - {"cudaDevAttrMaxTexture3DHeightAlt", {"hipDeviceAttributeMaxTexture3DHeightAlternate", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 48 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE - {"cudaDevAttrMaxTexture3DDepthAlt", {"hipDeviceAttributeMaxTexture3DDepthAlternate", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 49 - // CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID - {"cudaDevAttrPciDomainId", {"hipDeviceAttributePciDomainId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 50 - // CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT - {"cudaDevAttrTexturePitchAlignment", {"hipDeviceAttributeTexturePitchAlignment", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 51 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH - {"cudaDevAttrMaxTextureCubemapWidth", {"hipDeviceAttributeMaxTextureCubemapWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 52 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH - {"cudaDevAttrMaxTextureCubemapLayeredWidth", {"hipDeviceAttributeMaxTextureCubemapLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 53 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS - {"cudaDevAttrMaxTextureCubemapLayeredLayers", {"hipDeviceAttributeMaxTextureCubemapLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 54 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH - {"cudaDevAttrMaxSurface1DWidth", {"hipDeviceAttributeMaxSurface1DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 55 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH - {"cudaDevAttrMaxSurface2DWidth", {"hipDeviceAttributeMaxSurface2DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 56 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT - {"cudaDevAttrMaxSurface2DHeight", {"hipDeviceAttributeMaxSurface2DHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 57 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH - {"cudaDevAttrMaxSurface3DWidth", {"hipDeviceAttributeMaxSurface3DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 58 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT - {"cudaDevAttrMaxSurface3DHeight", {"hipDeviceAttributeMaxSurface3DHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 59 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH - {"cudaDevAttrMaxSurface3DDepth", {"hipDeviceAttributeMaxSurface3DDepth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 60 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH - {"cudaDevAttrMaxSurface1DLayeredWidth", {"hipDeviceAttributeMaxSurface1DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 61 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS - {"cudaDevAttrMaxSurface1DLayeredLayers", {"hipDeviceAttributeMaxSurface1DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 62 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH - {"cudaDevAttrMaxSurface2DLayeredWidth", {"hipDeviceAttributeMaxSurface2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 63 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT - {"cudaDevAttrMaxSurface2DLayeredHeight", {"hipDeviceAttributeMaxSurface2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 64 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LA YERS - {"cudaDevAttrMaxSurface2DLayeredLayers", {"hipDeviceAttributeMaxSurface2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 65 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH - {"cudaDevAttrMaxSurfaceCubemapWidth", {"hipDeviceAttributeMaxSurfaceCubemapWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 66 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH - {"cudaDevAttrMaxSurfaceCubemapLayeredWidth", {"hipDeviceAttributeMaxSurfaceCubemapLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 67 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS - {"cudaDevAttrMaxSurfaceCubemapLayeredLayers", {"hipDeviceAttributeMaxSurfaceCubemapLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 68 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH - {"cudaDevAttrMaxTexture1DLinearWidth", {"hipDeviceAttributeMaxTexture1DLinearWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 69 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH - {"cudaDevAttrMaxTexture2DLinearWidth", {"hipDeviceAttributeMaxTexture2DLinearWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 70 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT - {"cudaDevAttrMaxTexture2DLinearHeight", {"hipDeviceAttributeMaxTexture2DLinearHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 71 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH - {"cudaDevAttrMaxTexture2DLinearPitch", {"hipDeviceAttributeMaxTexture2DLinearPitch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 72 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH - {"cudaDevAttrMaxTexture2DMipmappedWidth", {"hipDeviceAttributeMaxTexture2DMipmappedWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 73 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT - {"cudaDevAttrMaxTexture2DMipmappedHeight", {"hipDeviceAttributeMaxTexture2DMipmappedHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 74 - // CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR - {"cudaDevAttrComputeCapabilityMajor", {"hipDeviceAttributeComputeCapabilityMajor", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 75 - // CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR - {"cudaDevAttrComputeCapabilityMinor", {"hipDeviceAttributeComputeCapabilityMinor", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 76 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH - {"cudaDevAttrMaxTexture1DMipmappedWidth", {"hipDeviceAttributeMaxTexture1DMipmappedWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 77 - // CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED - {"cudaDevAttrStreamPrioritiesSupported", {"hipDeviceAttributeStreamPrioritiesSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 78 - // CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED - {"cudaDevAttrGlobalL1CacheSupported", {"hipDeviceAttributeGlobalL1CacheSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 79 - // CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED - {"cudaDevAttrLocalL1CacheSupported", {"hipDeviceAttributeLocalL1CacheSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 80 - // CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR - {"cudaDevAttrMaxSharedMemoryPerMultiprocessor", {"hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 81 - // CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR - {"cudaDevAttrMaxRegistersPerMultiprocessor", {"hipDeviceAttributeMaxRegistersPerMultiprocessor", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 82 - // CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY - {"cudaDevAttrManagedMemory", {"hipDeviceAttributeManagedMemory", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 83 - // CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD - {"cudaDevAttrIsMultiGpuBoard", {"hipDeviceAttributeIsMultiGpuBoard", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 84 - // CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID - {"cudaDevAttrMultiGpuBoardGroupID", {"hipDeviceAttributeMultiGpuBoardGroupID", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 85 - // CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED - {"cudaDevAttrHostNativeAtomicSupported", {"hipDeviceAttributeHostNativeAtomicSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 86 - // CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO - {"cudaDevAttrSingleToDoublePrecisionPerfRatio", {"hipDeviceAttributeSingleToDoublePrecisionPerfRatio", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 87 - // CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS - {"cudaDevAttrPageableMemoryAccess", {"hipDeviceAttributePageableMemoryAccess", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 88 - // CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS - {"cudaDevAttrConcurrentManagedAccess", {"hipDeviceAttributeConcurrentManagedAccess", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 89 - // CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED - {"cudaDevAttrComputePreemptionSupported", {"hipDeviceAttributeComputePreemptionSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 90 - // CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM - {"cudaDevAttrCanUseHostPointerForRegisteredMem", {"hipDeviceAttributeCanUseHostPointerForRegisteredMem", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 91 - // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS - {"cudaDevAttrReserved92", {"hipDeviceAttributeCanUseStreamMemOps", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 92 - // CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS - {"cudaDevAttrReserved93", {"hipDeviceAttributeCanUse64BitStreamMemOps", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 93 - // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR - {"cudaDevAttrReserved94", {"hipDeviceAttributeCanUseStreamWaitValueNor", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 94 - // CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH - {"cudaDevAttrCooperativeLaunch", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 95 - // CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH - {"cudaDevAttrCooperativeMultiDeviceLaunch", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 96 - // CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN - {"cudaDevAttrMaxSharedMemoryPerBlockOptin", {"hipDeviceAttributeMaxSharedMemoryPerBlockOptin", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 97 - // CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES - {"cudaDevAttrCanFlushRemoteWrites", {"hipDeviceAttributeCanFlushRemoteWrites", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 98 - // CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED - {"cudaDevAttrHostRegisterSupported", {"hipDeviceAttributeHostRegisterSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 99 - // CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES - {"cudaDevAttrPageableMemoryAccessUsesHostPageTables", {"hipDeviceAttributePageableMemoryAccessUsesHostPageTables", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 100 - // CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST - {"cudaDevAttrDirectManagedMemAccessFromHost", {"hipDeviceAttributeDirectManagedMemAccessFromHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 101 - - // CUdevice_P2PAttribute - {"cudaDeviceP2PAttr", {"hipDeviceP2PAttribute", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaDeviceP2PAttr enum values - // CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01 - {"cudaDevP2PAttrPerformanceRank", {"hipDeviceP2PAttributePerformanceRank", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02 - {"cudaDevP2PAttrAccessSupported", {"hipDeviceP2PAttributeAccessSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03 - {"cudaDevP2PAttrNativeAtomicSupported", {"hipDeviceP2PAttributeNativeAtomicSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x04 - {"cudaDevP2PAttrCudaArrayAccessSupported", {"hipDevP2PAttributeCudaArrayAccessSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - - // cudaEGL.h - presented only on Linux in nvidia-cuda-dev package - // CUeglColorFormat - {"cudaEglColorFormat", {"hipEglColorFormat", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaEglColorFormat enum values - // CU_EGL_COLOR_FORMAT_YUV420_PLANAR = 0x00 - {"cudaEglColorFormatYUV420Planar", {"hipEglColorFormatYUV420Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR= 0x01 - {"cudaEglColorFormatYUV420SemiPlanar ", {"hipEglColorFormatYUV420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_EGL_COLOR_FORMAT_YUV422_PLANAR = 0x02 - {"cudaEglColorFormatYUV422Planar", {"hipEglColorFormatYUV422Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR = 0x03 - {"cudaEglColorFormatYUV422SemiPlanar", {"hipEglColorFormatYUV422SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_EGL_COLOR_FORMAT_RGB = 0x04 - {"cudaEglColorFormatRGB", {"hipEglColorFormatRGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_EGL_COLOR_FORMAT_BGR = 0x05 - {"cudaEglColorFormatBGR", {"hipEglColorFormatBGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 5 - // CU_EGL_COLOR_FORMAT_ARGB = 0x06 - {"cudaEglColorFormatARGB", {"hipEglColorFormatARGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 6 - // CU_EGL_COLOR_FORMAT_RGBA = 0x07 - {"cudaEglColorFormatRGBA", {"hipEglColorFormatRGBA", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 7 - // CU_EGL_COLOR_FORMAT_L = 0x08 - {"cudaEglColorFormatL", {"hipEglColorFormatL", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - // CU_EGL_COLOR_FORMAT_R = 0x09 - {"cudaEglColorFormatR", {"hipEglColorFormatR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 9 - // CU_EGL_COLOR_FORMAT_YUV444_PLANAR = 0x0A - {"cudaEglColorFormatYUV444Planar", {"hipEglColorFormatYUV444Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 10 - // CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR = 0x0B - {"cudaEglColorFormatYUV444SemiPlanar", {"hipEglColorFormatYUV444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 11 - // CU_EGL_COLOR_FORMAT_YUYV_422 = 0x0C - {"cudaEglColorFormatYUYV422", {"hipEglColorFormatYUYV422", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 12 - // CU_EGL_COLOR_FORMAT_UYVY_422 = 0x0D - {"cudaEglColorFormatUYVY422", {"hipEglColorFormatUYVY422", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 13 - // CU_EGL_COLOR_FORMAT_ABGR = 0x0E - {"cudaEglColorFormatABGR", {"hipEglColorFormatABGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 14 - // CU_EGL_COLOR_FORMAT_BGRA = 0x0F - {"cudaEglColorFormatBGRA", {"hipEglColorFormatBGRA", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 15 - // CU_EGL_COLOR_FORMAT_A = 0x10 - {"cudaEglColorFormatA", {"hipEglColorFormatA", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 16 - // CU_EGL_COLOR_FORMAT_RG = 0x11 - {"cudaEglColorFormatRG", {"hipEglColorFormatRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 17 - // CU_EGL_COLOR_FORMAT_AYUV = 0x12 - {"cudaEglColorFormatAYUV", {"hipEglColorFormatAYUV", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 18 - // CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR = 0x13 - {"cudaEglColorFormatYVU444SemiPlanar", {"hipEglColorFormatYVU444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 19 - // CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR = 0x14 - {"cudaEglColorFormatYVU422SemiPlanar", {"hipEglColorFormatYVU422SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 20 - // CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR = 0x15 - {"cudaEglColorFormatYVU420SemiPlanar", {"hipEglColorFormatYVU420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 21 - // CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR = 0x16 - {"cudaEglColorFormatY10V10U10_444SemiPlanar", {"hipEglColorFormatY10V10U10_444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 22 - // CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR = 0x17 - {"cudaEglColorFormatY10V10U10_420SemiPlanar", {"hipEglColorFormatY10V10U10_420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 23 - // CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR = 0x18 - {"cudaEglColorFormatY12V12U12_444SemiPlanar", {"hipEglColorFormatY12V12U12_444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 24 - // CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR = 0x19 - {"cudaEglColorFormatY12V12U12_420SemiPlanar", {"hipEglColorFormatY12V12U12_420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 25 - // CU_EGL_COLOR_FORMAT_VYUY_ER = 0x1A - {"cudaEglColorFormatVYUY_ER", {"hipEglColorFormatVYUY_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 26 - // CU_EGL_COLOR_FORMAT_UYVY_ER = 0x1B - {"cudaEglColorFormatUYVY_ER", {"hipEglColorFormatUYVY_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 27 - // CU_EGL_COLOR_FORMAT_YUYV_ER = 0x1C - {"cudaEglColorFormatYUYV_ER", {"hipEglColorFormatYUYV_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 28 - // CU_EGL_COLOR_FORMAT_YVYU_ER = 0x1D - {"cudaEglColorFormatYVYU_ER", {"hipEglColorFormatYVYU_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 29 - // CU_EGL_COLOR_FORMAT_YUV_ER = 0x1E - {"cudaEglColorFormatYUV_ER", {"hipEglColorFormatYUV_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 30 - // CU_EGL_COLOR_FORMAT_YUVA_ER = 0x1F - {"cudaEglColorFormatYUVA_ER", {"hipEglColorFormatYUVA_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 31 - // CU_EGL_COLOR_FORMAT_AYUV_ER = 0x20 - {"cudaEglColorFormatAYUV_ER", {"hipEglColorFormatAYUV_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 32 - // CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER = 0x21 - {"cudaEglColorFormatYUV444Planar_ER", {"hipEglColorFormatYUV444Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 33 - // CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER = 0x22 - {"cudaEglColorFormatYUV422Planar_ER", {"hipEglColorFormatYUV422Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 34 - // CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER = 0x23 - {"cudaEglColorFormatYUV420Planar_ER", {"hipEglColorFormatYUV420Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 35 - // CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER = 0x24 - {"cudaEglColorFormatYUV444SemiPlanar_ER", {"hipEglColorFormatYUV444SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 36 - // CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER = 0x25 - {"cudaEglColorFormatYUV422SemiPlanar_ER", {"hipEglColorFormatYUV422SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 37 - // CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER = 0x26 - {"cudaEglColorFormatYUV420SemiPlanar_ER", {"hipEglColorFormatYUV420SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 38 - // CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER = 0x27 - {"cudaEglColorFormatYVU444Planar_ER", {"hipEglColorFormatYVU444Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 39 - // CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER = 0x28 - {"cudaEglColorFormatYVU422Planar_ER", {"hipEglColorFormatYVU422Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 40 - // CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER = 0x29 - {"cudaEglColorFormatYVU420Planar_ER", {"hipEglColorFormatYVU420Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 41 - // CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER = 0x2A - {"cudaEglColorFormatYVU444SemiPlanar_ER", {"hipEglColorFormatYVU444SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 42 - // CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER = 0x2B - {"cudaEglColorFormatYVU422SemiPlanar_ER", {"hipEglColorFormatYVU422SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 43 - // CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER = 0x2C - {"cudaEglColorFormatYVU420SemiPlanar_ER", {"hipEglColorFormatYVU420SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 44 - // CU_EGL_COLOR_FORMAT_BAYER_RGGB = 0x2D - {"cudaEglColorFormatBayerRGGB", {"hipEglColorFormatBayerRGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 45 - // CU_EGL_COLOR_FORMAT_BAYER_BGGR = 0x2E - {"cudaEglColorFormatBayerBGGR", {"hipEglColorFormatBayerBGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 46 - // CU_EGL_COLOR_FORMAT_BAYER_GRBG = 0x2F - {"cudaEglColorFormatBayerGRBG", {"hipEglColorFormatBayerGRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 47 - // CU_EGL_COLOR_FORMAT_BAYER_GBRG = 0x30 - {"cudaEglColorFormatBayerGBRG", {"hipEglColorFormatBayerGBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 48 - // CU_EGL_COLOR_FORMAT_BAYER10_RGGB = 0x31 - {"cudaEglColorFormatBayer10RGGB", {"hipEglColorFormatBayer10RGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 49 - // CU_EGL_COLOR_FORMAT_BAYER10_BGGR = 0x32 - {"cudaEglColorFormatBayer10BGGR", {"hipEglColorFormatBayer10BGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 50 - // CU_EGL_COLOR_FORMAT_BAYER10_GRBG = 0x33 - {"cudaEglColorFormatBayer10GRBG", {"hipEglColorFormatBayer10GRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 51 - // CU_EGL_COLOR_FORMAT_BAYER10_GBRG = 0x34 - {"cudaEglColorFormatBayer10GBRG", {"hipEglColorFormatBayer10GBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 52 - // CU_EGL_COLOR_FORMAT_BAYER12_RGGB = 0x35 - {"cudaEglColorFormatBayer12RGGB", {"hipEglColorFormatBayer12RGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 53 - // CU_EGL_COLOR_FORMAT_BAYER12_BGGR = 0x36 - {"cudaEglColorFormatBayer12BGGR", {"hipEglColorFormatBayer12BGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 54 - // CU_EGL_COLOR_FORMAT_BAYER12_GRBG = 0x37 - {"cudaEglColorFormatBayer12GRBG", {"hipEglColorFormatBayer12GRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 55 - // CU_EGL_COLOR_FORMAT_BAYER12_GBRG = 0x38 - {"cudaEglColorFormatBayer12GBRG", {"hipEglColorFormatBayer12GBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 56 - // CU_EGL_COLOR_FORMAT_BAYER14_RGGB = 0x39 - {"cudaEglColorFormatBayer14RGGB", {"hipEglColorFormatBayer14RGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 57 - // CU_EGL_COLOR_FORMAT_BAYER14_BGGR = 0x3A - {"cudaEglColorFormatBayer14BGGR", {"hipEglColorFormatBayer14BGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 58 - // CU_EGL_COLOR_FORMAT_BAYER14_GRBG = 0x3B - {"cudaEglColorFormatBayer14GRBG", {"hipEglColorFormatBayer14GRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 59 - // CU_EGL_COLOR_FORMAT_BAYER14_GBRG = 0x3C - {"cudaEglColorFormatBayer14GBRG", {"hipEglColorFormatBayer14GBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 60 - // CU_EGL_COLOR_FORMAT_BAYER20_RGGB = 0x3D - {"cudaEglColorFormatBayer20RGGB", {"hipEglColorFormatBayer20RGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 61 - // CU_EGL_COLOR_FORMAT_BAYER20_BGGR = 0x3E - {"cudaEglColorFormatBayer20BGGR", {"hipEglColorFormatBayer20BGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 62 - // CU_EGL_COLOR_FORMAT_BAYER20_GRBG = 0x3F - {"cudaEglColorFormatBayer20GRBG", {"hipEglColorFormatBayer20GRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 63 - // CU_EGL_COLOR_FORMAT_BAYER20_GBRG = 0x40 - {"cudaEglColorFormatBayer20GBRG", {"hipEglColorFormatBayer20GBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 64 - // CU_EGL_COLOR_FORMAT_YVU444_PLANAR = 0x41 - {"cudaEglColorFormatYVU444Planar", {"hipEglColorFormatYVU444Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 65 - // CU_EGL_COLOR_FORMAT_YVU422_PLANAR = 0x42 - {"cudaEglColorFormatYVU422Planar", {"hipEglColorFormatYVU422Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 66 - // CU_EGL_COLOR_FORMAT_YVU420_PLANAR = 0x43 - {"cudaEglColorFormatYVU420Planar", {"hipEglColorFormatYVU420Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 67 - // CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB = 0x44 - {"cudaEglColorFormatBayerIspRGGB", {"hipEglColorFormatBayerIspRGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 68 - // CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR = 0x45 - {"cudaEglColorFormatBayerIspBGGR", {"hipEglColorFormatBayerIspBGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 69 - // CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG = 0x46 - {"cudaEglColorFormatBayerIspGRBG", {"hipEglColorFormatBayerIspGRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 70 - // CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG = 0x47 - {"cudaEglColorFormatBayerIspGBRG", {"hipEglColorFormatBayerIspGBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 71 - - // CUeglFrameType - {"cudaEglFrameType", {"hipEglFrameType", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaEglFrameType enum values - // CU_EGL_FRAME_TYPE_ARRAY - {"cudaEglFrameTypeArray", {"hipEglFrameTypeArray", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_EGL_FRAME_TYPE_PITCH - {"cudaEglFrameTypePitch", {"hipEglFrameTypePitch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - - // CUeglResourceLocationFlags - {"cudaEglResourceLocationFlags", {"hipEglResourceLocationFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaEglResourceLocationFlagss enum values - // CU_EGL_RESOURCE_LOCATION_SYSMEM - {"cudaEglResourceLocationSysmem", {"hipEglResourceLocationSysmem", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - // CU_EGL_RESOURCE_LOCATION_VIDMEM - {"cudaEglResourceLocationVidmem", {"hipEglResourceLocationVidmem", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - - // CUresult - {"cudaError", {"hipError_t", "", CONV_TYPE, API_RUNTIME}}, - {"cudaError_t", {"hipError_t", "", CONV_TYPE, API_RUNTIME}}, - // cudaError enum values - // CUDA_SUCCESS - {"cudaSuccess", {"hipSuccess", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CUDA_ERROR_INVALID_VALUE - {"cudaErrorInvalidValue", {"hipErrorInvalidValue", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CUDA_ERROR_OUT_OF_MEMORY - {"cudaErrorMemoryAllocation", {"hipErrorOutOfMemory", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - // CUDA_ERROR_NOT_INITIALIZED - {"cudaErrorInitializationError", {"hipErrorNotInitialized", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - // CUDA_ERROR_DEINITIALIZED - {"cudaErrorCudartUnloading", {"hipErrorDeinitialized", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 - // CUDA_ERROR_PROFILER_DISABLED - {"cudaErrorProfilerDisabled", {"hipErrorProfilerDisabled", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 5 - // Deprecated since CUDA 5.0 - // CUDA_ERROR_PROFILER_NOT_INITIALIZED - {"cudaErrorProfilerNotInitialized", {"hipErrorProfilerNotInitialized", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 6 - // Deprecated since CUDA 5.0 - // CUDA_ERROR_PROFILER_ALREADY_STARTED - {"cudaErrorProfilerAlreadyStarted", {"hipErrorProfilerAlreadyStarted", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 7 - // Deprecated since CUDA 5.0 - // CUDA_ERROR_PROFILER_ALREADY_STOPPED - {"cudaErrorProfilerAlreadyStopped", {"hipErrorProfilerAlreadyStopped", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 8 - // no analogue - {"cudaErrorInvalidConfiguration", {"hipErrorInvalidConfiguration", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 9 - // no analogue - {"cudaErrorInvalidPitchValue", {"hipErrorInvalidPitchValue", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 12 - // no analogue - {"cudaErrorInvalidSymbol", {"hipErrorInvalidSymbol", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 13 - // Deprecated since CUDA 10.1 - // no analogue - {"cudaErrorInvalidHostPointer", {"hipErrorInvalidHostPointer", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 16 - // Deprecated since CUDA 10.1 - // no analogue - {"cudaErrorInvalidDevicePointer", {"hipErrorInvalidDevicePointer", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 17 - // no analogue - {"cudaErrorInvalidTexture", {"hipErrorInvalidTexture", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 18 - // no analogue - {"cudaErrorInvalidTextureBinding", {"hipErrorInvalidTextureBinding", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 19 - // no analogue - {"cudaErrorInvalidChannelDescriptor", {"hipErrorInvalidChannelDescriptor", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 20 - // no analogue - {"cudaErrorInvalidMemcpyDirection", {"hipErrorInvalidMemcpyDirection", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 21 - // no analogue - {"cudaErrorAddressOfConstant", {"hipErrorAddressOfConstant", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 22 - // no analogue - {"cudaErrorTextureFetchFailed", {"hipErrorTextureFetchFailed", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 23 - // no analogue - {"cudaErrorTextureNotBound", {"hipErrorTextureNotBound", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 24 - // no analogue - {"cudaErrorSynchronizationError", {"hipErrorSynchronizationError", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 25 - // no analogue - {"cudaErrorInvalidFilterSetting", {"hipErrorInvalidFilterSetting", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 26 - // no analogue - {"cudaErrorInvalidNormSetting", {"hipErrorInvalidNormSetting", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 27 - // no analogue - {"cudaErrorMixedDeviceExecution", {"hipErrorMixedDeviceExecution", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 28 - // Deprecated since CUDA 4.1 - // no analogue - {"cudaErrorNotYetImplemented", {"hipErrorNotYetImplemented", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 31 - // Deprecated since CUDA 3.1 - // no analogue - {"cudaErrorMemoryValueTooLarge", {"hipErrorMemoryValueTooLarge", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 32 - // no analogue - {"cudaErrorInsufficientDriver", {"hipErrorInsufficientDriver", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 35 - // no analogue - {"cudaErrorInvalidSurface", {"hipErrorInvalidSurface", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 37 - // no analogue - {"cudaErrorDuplicateVariableName", {"hipErrorDuplicateVariableName", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 43 - // no analogue - {"cudaErrorDuplicateTextureName", {"hipErrorDuplicateTextureName", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 44 - // no analogue - {"cudaErrorDuplicateSurfaceName", {"hipErrorDuplicateSurfaceName", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 45 - // no analogue - {"cudaErrorDevicesUnavailable", {"hipErrorDevicesUnavailable", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 46 - // no analogue - {"cudaErrorIncompatibleDriverContext", {"hipErrorIncompatibleDriverContext", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 49 - // no analogue - {"cudaErrorMissingConfiguration", {"hipErrorMissingConfiguration", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 52 - // no analogue - {"cudaErrorPriorLaunchFailure", {"hipErrorPriorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 53 - // no analogue - {"cudaErrorLaunchMaxDepthExceeded", {"hipErrorLaunchMaxDepthExceeded", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 65 - // no analogue - {"cudaErrorLaunchFileScopedTex", {"hipErrorLaunchFileScopedTex", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 66 - // no analogue - {"cudaErrorLaunchFileScopedSurf", {"hipErrorLaunchFileScopedSurf", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 67 - // no analogue - {"cudaErrorSyncDepthExceeded", {"hipErrorSyncDepthExceeded", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 68 - // no analogue - {"cudaErrorLaunchPendingCountExceeded", {"hipErrorLaunchPendingCountExceeded", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 69 - // no analogue - {"cudaErrorInvalidDeviceFunction", {"hipErrorInvalidDeviceFunction", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 98 - // CUDA_ERROR_NO_DEVICE - {"cudaErrorNoDevice", {"hipErrorNoDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 100 - // CUDA_ERROR_INVALID_DEVICE - {"cudaErrorInvalidDevice", {"hipErrorInvalidDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 101 - // no analogue - {"cudaErrorStartupFailure", {"hipErrorStartupFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 127 - // CUDA_ERROR_INVALID_IMAGE - {"cudaErrorInvalidKernelImage", {"hipErrorInvalidImage", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 200 - // CUDA_ERROR_INVALID_CONTEXT - {"cudaErrorDeviceUninitialized", {"hipErrorInvalidContext", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 201 - // Typo fixed in 10.2 - // CUDA_ERROR_INVALID_CONTEXT - {"cudaErrorDeviceUninitilialized", {"hipErrorInvalidContext", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 201 - // CUDA_ERROR_MAP_FAILED - {"cudaErrorMapBufferObjectFailed", {"hipErrorMapFailed", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 205 - // CUDA_ERROR_UNMAP_FAILED - {"cudaErrorUnmapBufferObjectFailed", {"hipErrorUnmapFailed", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 206 - // CUDA_ERROR_ARRAY_IS_MAPPED - {"cudaErrorArrayIsMapped", {"hipErrorArrayIsMapped", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 207 - // CUDA_ERROR_ALREADY_MAPPED - {"cudaErrorAlreadyMapped", {"hipErrorAlreadyMapped", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 208 - // CUDA_ERROR_NO_BINARY_FOR_GPU - {"cudaErrorNoKernelImageForDevice", {"hipErrorNoBinaryForGpu", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 209 - // CUDA_ERROR_ALREADY_ACQUIRED - {"cudaErrorAlreadyAcquired", {"hipErrorAlreadyAcquired", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 210 - // CUDA_ERROR_NOT_MAPPED - {"cudaErrorNotMapped", {"hipErrorNotMapped", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 211 - // CUDA_ERROR_NOT_MAPPED_AS_ARRAY - {"cudaErrorNotMappedAsArray", {"hipErrorNotMappedAsArray", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 212 - // CUDA_ERROR_NOT_MAPPED_AS_POINTER - {"cudaErrorNotMappedAsPointer", {"hipErrorNotMappedAsPointer", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 213 - // CUDA_ERROR_ECC_UNCORRECTABLE - {"cudaErrorECCUncorrectable", {"hipErrorECCNotCorrectable", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 214 - // CUDA_ERROR_UNSUPPORTED_LIMIT - {"cudaErrorUnsupportedLimit", {"hipErrorUnsupportedLimit", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 215 - // CUDA_ERROR_CONTEXT_ALREADY_IN_USE - {"cudaErrorDeviceAlreadyInUse", {"hipErrorContextAlreadyInUse", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 216 - // CUDA_ERROR_PEER_ACCESS_UNSUPPORTED - {"cudaErrorPeerAccessUnsupported", {"hipErrorPeerAccessUnsupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 217 - // CUDA_ERROR_INVALID_PTX - {"cudaErrorInvalidPtx", {"hipErrorInvalidKernelFile", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 218 - // CUDA_ERROR_INVALID_GRAPHICS_CONTEXT - {"cudaErrorInvalidGraphicsContext", {"hipErrorInvalidGraphicsContext", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 219 - // CUDA_ERROR_NVLINK_UNCORRECTABLE - {"cudaErrorNvlinkUncorrectable", {"hipErrorNvlinkUncorrectable", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 220 - // CUDA_ERROR_JIT_COMPILER_NOT_FOUND - {"cudaErrorJitCompilerNotFound", {"hipErrorJitCompilerNotFound", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 221 - // CUDA_ERROR_INVALID_SOURCE - {"cudaErrorInvalidSource", {"hipErrorInvalidSource", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 300 - // CUDA_ERROR_FILE_NOT_FOUND - {"cudaErrorFileNotFound", {"hipErrorFileNotFound", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 301 - // CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND - {"cudaErrorSharedObjectSymbolNotFound", {"hipErrorSharedObjectSymbolNotFound", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 302 - // CUDA_ERROR_SHARED_OBJECT_INIT_FAILED - {"cudaErrorSharedObjectInitFailed", {"hipErrorSharedObjectInitFailed", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 303 - // CUDA_ERROR_OPERATING_SYSTEM - {"cudaErrorOperatingSystem", {"hipErrorOperatingSystem", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 304 - // CUDA_ERROR_INVALID_HANDLE - {"cudaErrorInvalidResourceHandle", {"hipErrorInvalidHandle", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 400 - // CUDA_ERROR_ILLEGAL_STATE - {"cudaErrorIllegalState", {"hipErrorIllegalState", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 401 - // CUDA_ERROR_NOT_FOUND - {"cudaErrorSymbolNotFound", {"hipErrorNotFound", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 500 - // CUDA_ERROR_NOT_READY - {"cudaErrorNotReady", {"hipErrorNotReady", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 600 - // CUDA_ERROR_ILLEGAL_ADDRESS - {"cudaErrorIllegalAddress", {"hipErrorIllegalAddress", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 700 - // CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES - {"cudaErrorLaunchOutOfResources", {"hipErrorLaunchOutOfResources", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 701 - // CUDA_ERROR_LAUNCH_TIMEOUT - {"cudaErrorLaunchTimeout", {"hipErrorLaunchTimeOut", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 702 - // CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING - {"cudaErrorLaunchIncompatibleTexturing", {"hipErrorLaunchIncompatibleTexturing", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 703 - // CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED - {"cudaErrorPeerAccessAlreadyEnabled", {"hipErrorPeerAccessAlreadyEnabled", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 704 - // CUDA_ERROR_PEER_ACCESS_NOT_ENABLED - {"cudaErrorPeerAccessNotEnabled", {"hipErrorPeerAccessNotEnabled", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 705 - // CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE - {"cudaErrorSetOnActiveProcess", {"hipErrorSetOnActiveProcess", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 708 - // CUDA_ERROR_CONTEXT_IS_DESTROYED - {"cudaErrorContextIsDestroyed", {"hipErrorContextIsDestroyed", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 709 - // CUDA_ERROR_ASSERT - {"cudaErrorAssert", {"hipErrorAssert", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 710 - // CUDA_ERROR_TOO_MANY_PEERS - {"cudaErrorTooManyPeers", {"hipErrorTooManyPeers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 711 - // CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED - {"cudaErrorHostMemoryAlreadyRegistered", {"hipErrorHostMemoryAlreadyRegistered", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 712 - // CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED - {"cudaErrorHostMemoryNotRegistered", {"hipErrorHostMemoryNotRegistered", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 713 - // CUDA_ERROR_HARDWARE_STACK_ERROR - {"cudaErrorHardwareStackError", {"hipErrorHardwareStackError", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 714 - // CUDA_ERROR_ILLEGAL_INSTRUCTION - {"cudaErrorIllegalInstruction", {"hipErrorIllegalInstruction", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 715 - // CUDA_ERROR_MISALIGNED_ADDRESS - {"cudaErrorMisalignedAddress", {"hipErrorMisalignedAddress", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 716 - // CUDA_ERROR_INVALID_ADDRESS_SPACE - {"cudaErrorInvalidAddressSpace", {"hipErrorInvalidAddressSpace", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 717 - // CUDA_ERROR_INVALID_PC - {"cudaErrorInvalidPc", {"hipErrorInvalidPc", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 718 - // CUDA_ERROR_LAUNCH_FAILED - {"cudaErrorLaunchFailure", {"hipErrorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 719 - // CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE - {"cudaErrorCooperativeLaunchTooLarge", {"hipErrorCooperativeLaunchTooLarge", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 720 - // CUDA_ERROR_NOT_PERMITTED - {"cudaErrorNotPermitted", {"hipErrorNotPermitted", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 800 - // CUDA_ERROR_NOT_SUPPORTED - {"cudaErrorNotSupported", {"hipErrorNotSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 801 - // CUDA_ERROR_SYSTEM_NOT_READY - {"cudaErrorSystemNotReady", {"hipErrorSystemNotReady", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 802 - // CUDA_ERROR_SYSTEM_DRIVER_MISMATCH - {"cudaErrorSystemDriverMismatch", {"hipErrorSystemDriverMismatch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 803 - // CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE - {"cudaErrorCompatNotSupportedOnDevice", {"hipErrorCompatNotSupportedOnDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 804 - // CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED - {"cudaErrorStreamCaptureUnsupported", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 900 - // CUDA_ERROR_STREAM_CAPTURE_INVALIDATED - {"cudaErrorStreamCaptureInvalidated", {"hipErrorStreamCaptureInvalidated", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 901 - // CUDA_ERROR_STREAM_CAPTURE_MERGE - {"cudaErrorStreamCaptureMerge", {"hipErrorStreamCaptureMerge", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 902 - // CUDA_ERROR_STREAM_CAPTURE_UNMATCHED - {"cudaErrorStreamCaptureUnmatched", {"hipErrorStreamCaptureUnmatched", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 903 - // CUDA_ERROR_STREAM_CAPTURE_UNJOINED - {"cudaErrorStreamCaptureUnjoined", {"hipErrorStreamCaptureUnjoined", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 904 - // CUDA_ERROR_STREAM_CAPTURE_ISOLATION - {"cudaErrorStreamCaptureIsolation", {"hipErrorStreamCaptureIsolation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 905 - // CUDA_ERROR_STREAM_CAPTURE_IMPLICIT - {"cudaErrorStreamCaptureImplicit", {"hipErrorStreamCaptureImplicit", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 906 - // CUDA_ERROR_CAPTURED_EVENT - {"cudaErrorCapturedEvent", {"hipErrorCapturedEvent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 907 - // CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD - {"cudaErrorStreamCaptureWrongThread", {"hipErrorStreamCaptureWrongThread", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 908 - // CUDA_ERROR_TIMEOUT - {"cudaErrorTimeout", {"hipErrorTimeout", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 909 - // CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE - {"cudaErrorGraphExecUpdateFailure", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 910 - // CUDA_ERROR_UNKNOWN - {"cudaErrorUnknown", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 999 - // Deprecated since CUDA 4.1 - {"cudaErrorApiFailureBase", {"hipErrorApiFailureBase", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 10000 - - // CUexternalMemoryHandleType - {"cudaExternalMemoryHandleType", {"hipExternalMemoryHandleType", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaExternalMemoryHandleType enum values - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD - {"cudaExternalMemoryHandleTypeOpaqueFd", {"hipExternalMemoryHandleTypeOpaqueFD", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 - {"cudaExternalMemoryHandleTypeOpaqueWin32", {"hipExternalMemoryHandleTypeOpaqueWin32", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT - {"cudaExternalMemoryHandleTypeOpaqueWin32Kmt", {"hipExternalMemoryHandleTypeOpaqueWin32KMT", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP - {"cudaExternalMemoryHandleTypeD3D12Heap", {"hipExternalMemoryHandleTypeD3D12Heap", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE - {"cudaExternalMemoryHandleTypeD3D12Resource", {"hipExternalMemoryHandleTypeD3D12Resource", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 5 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE - {"cudaExternalMemoryHandleTypeD3D11Resource", {"hipExternalMemoryHandleTypeD3D11Resource", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 6 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT - {"cudaExternalMemoryHandleTypeD3D11ResourceKmt", {"hipExternalMemoryHandleTypeD3D11ResourceKmt", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 7 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF - {"cudaExternalMemoryHandleTypeNvSciBuf", {"hipExternalMemoryHandleTypeNvSciBuf", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - - // CUexternalSemaphoreHandleType - {"cudaExternalSemaphoreHandleType", {"hipExternalSemaphoreHandleType", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaExternalSemaphoreHandleType enum values - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD - {"cudaExternalSemaphoreHandleTypeOpaqueFd", {"hipExternalSemaphoreHandleTypeOpaqueFD", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 - {"cudaExternalSemaphoreHandleTypeOpaqueWin32", {"hipExternalSemaphoreHandleTypeOpaqueWin32", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT - {"cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt", {"hipExternalSemaphoreHandleTypeOpaqueWin32KMT", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE - {"cudaExternalSemaphoreHandleTypeD3D12Fence", {"hipExternalSemaphoreHandleTypeD3D12Fence", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE - {"cudaExternalSemaphoreHandleTypeD3D11Fence", {"hipExternalSemaphoreHandleTypeD3D11Fence", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 5 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC - {"cudaExternalSemaphoreHandleTypeNvSciSync", {"hipExternalSemaphoreHandleTypeNvSciSync", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 6 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX - {"cudaExternalSemaphoreHandleTypeKeyedMutex", {"hipExternalSemaphoreHandleTypeKeyedMutex", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 7 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT - {"cudaExternalSemaphoreHandleTypeKeyedMutexKmt", {"hipExternalSemaphoreHandleTypeKeyedMutexKmt", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - - // CUfunction_attribute - // NOTE: only last, starting from 8, values are presented and are equal to Driver's ones - {"cudaFuncAttribute", {"hipFuncAttribute", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaFuncAttribute enum values - // CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES - {"cudaFuncAttributeMaxDynamicSharedMemorySize", {"hipFuncAttributeMaxDynamicSharedMemorySize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - // CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT - {"cudaFuncAttributePreferredSharedMemoryCarveout", {"hipFuncAttributePreferredSharedMemoryCarveout", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 9 - // CU_FUNC_ATTRIBUTE_MAX - {"cudaFuncAttributeMax", {"hipFuncAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 10 - - // CUfunc_cache - {"cudaFuncCache", {"hipFuncCache_t", "", CONV_TYPE, API_RUNTIME}}, - // cudaFuncCache enum values - // CU_FUNC_CACHE_PREFER_NONE = 0x00 - {"cudaFuncCachePreferNone", {"hipFuncCachePreferNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CU_FUNC_CACHE_PREFER_SHARED = 0x01 - {"cudaFuncCachePreferShared", {"hipFuncCachePreferShared", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CU_FUNC_CACHE_PREFER_L1 = 0x02 - {"cudaFuncCachePreferL1", {"hipFuncCachePreferL1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - // CU_FUNC_CACHE_PREFER_EQUAL = 0x03 - {"cudaFuncCachePreferEqual", {"hipFuncCachePreferEqual", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - - // CUarray_cubemap_face - {"cudaGraphicsCubeFace", {"hipGraphicsCubeFace", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphicsCubeFace enum values - // CU_CUBEMAP_FACE_POSITIVE_X - {"cudaGraphicsCubeFacePositiveX", {"hipGraphicsCubeFacePositiveX", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - // CU_CUBEMAP_FACE_NEGATIVE_X - {"cudaGraphicsCubeFaceNegativeX", {"hipGraphicsCubeFaceNegativeX", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CU_CUBEMAP_FACE_POSITIVE_Y - {"cudaGraphicsCubeFacePositiveY", {"hipGraphicsCubeFacePositiveY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x02 - // CU_CUBEMAP_FACE_NEGATIVE_Y - {"cudaGraphicsCubeFaceNegativeY", {"hipGraphicsCubeFaceNegativeY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x03 - // CU_CUBEMAP_FACE_POSITIVE_Z - {"cudaGraphicsCubeFacePositiveZ", {"hipGraphicsCubeFacePositiveZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x04 - // CU_CUBEMAP_FACE_NEGATIVE_Z - {"cudaGraphicsCubeFaceNegativeZ", {"hipGraphicsCubeFaceNegativeZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x05 - - // CUgraphicsMapResourceFlags - {"cudaGraphicsMapFlags", {"hipGraphicsMapFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphicsMapFlags enum values - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00 - {"cudaGraphicsMapFlagsNone", {"hipGraphicsMapFlagsNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01 - {"cudaGraphicsMapFlagsReadOnly", {"hipGraphicsMapFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 - {"cudaGraphicsMapFlagsWriteDiscard", {"hipGraphicsMapFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUgraphicsRegisterFlags - {"cudaGraphicsRegisterFlags", {"hipGraphicsRegisterFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphicsRegisterFlags enum values - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00 - {"cudaGraphicsRegisterFlagsNone", {"hipGraphicsRegisterFlagsNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01 - {"cudaGraphicsRegisterFlagsReadOnly", {"hipGraphicsRegisterFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02 - {"cudaGraphicsRegisterFlagsWriteDiscard", {"hipGraphicsRegisterFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04 - {"cudaGraphicsRegisterFlagsSurfaceLoadStore", {"hipGraphicsRegisterFlagsSurfaceLoadStore", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08 - {"cudaGraphicsRegisterFlagsTextureGather", {"hipGraphicsRegisterFlagsTextureGather", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - - // CUgraphNodeType - {"cudaGraphNodeType", {"hipGraphNodeType", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphNodeType enum values - // CU_GRAPH_NODE_TYPE_KERNEL = 0 - {"cudaGraphNodeTypeKernel", {"hipGraphNodeTypeKernel", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - // CU_GRAPH_NODE_TYPE_MEMCPY = 1 - {"cudaGraphNodeTypeMemcpy", {"hipGraphNodeTypeMemcpy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CU_GRAPH_NODE_TYPE_MEMSET = 2 - {"cudaGraphNodeTypeMemset", {"hipGraphNodeTypeMemset", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x02 - // CU_GRAPH_NODE_TYPE_HOST = 3 - {"cudaGraphNodeTypeHost", {"hipGraphNodeTypeHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x03 - // CU_GRAPH_NODE_TYPE_GRAPH = 4 - {"cudaGraphNodeTypeGraph", {"hipGraphNodeTypeGraph", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x04 - // CU_GRAPH_NODE_TYPE_EMPTY = 5 - {"cudaGraphNodeTypeEmpty", {"hipGraphNodeTypeEmpty", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x05 - // CU_GRAPH_NODE_TYPE_COUNT - {"cudaGraphNodeTypeCount", {"hipGraphNodeTypeCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUgraphExecUpdateResult - {"cudaGraphExecUpdateResult", {"hipGraphExecUpdateResult", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphExecUpdateResult enum values - // CU_GRAPH_EXEC_UPDATE_SUCCESS - {"cudaGraphExecUpdateSuccess", {"hipGraphExecUpdateSuccess", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x0 - // CU_GRAPH_EXEC_UPDATE_ERROR - {"cudaGraphExecUpdateError", {"hipGraphExecUpdateError", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x1 - // CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED - {"cudaGraphExecUpdateErrorTopologyChanged", {"hipGraphExecUpdateErrorTopologyChanged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x2 - // CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED - {"cudaGraphExecUpdateErrorNodeTypeChanged", {"hipGraphExecUpdateErrorNodeTypeChanged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x3 - // CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED - {"cudaGraphExecUpdateErrorFunctionChanged", {"hipGraphExecUpdateErrorFunctionChanged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x4 - // CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED - {"cudaGraphExecUpdateErrorParametersChanged", {"hipGraphExecUpdateErrorParametersChanged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x5 - // CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED - {"cudaGraphExecUpdateErrorNotSupported", {"hipGraphExecUpdateErrorNotSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x6 - - // CUlimit - {"cudaLimit", {"hipLimit_t", "", CONV_TYPE, API_RUNTIME}}, - // cudaLimit enum values - // CU_LIMIT_STACK_SIZE - {"cudaLimitStackSize", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - // CU_LIMIT_PRINTF_FIFO_SIZE - {"cudaLimitPrintfFifoSize", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CU_LIMIT_MALLOC_HEAP_SIZE - {"cudaLimitMallocHeapSize", {"hipLimitMallocHeapSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x02 - // CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH - {"cudaLimitDevRuntimeSyncDepth", {"hipLimitDevRuntimeSyncDepth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x03 - // CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT - {"cudaLimitDevRuntimePendingLaunchCount", {"hipLimitDevRuntimePendingLaunchCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x04 - // CU_LIMIT_MAX_L2_FETCH_GRANULARITY - {"cudaLimitMaxL2FetchGranularity", {"hipLimitMaxL2FetchGranularity", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x05 - - // no analogue - {"cudaMemcpyKind", {"hipMemcpyKind", "", CONV_TYPE, API_RUNTIME}}, - // cudaMemcpyKind enum values - {"cudaMemcpyHostToHost", {"hipMemcpyHostToHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaMemcpyHostToDevice", {"hipMemcpyHostToDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - {"cudaMemcpyDeviceToHost", {"hipMemcpyDeviceToHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - {"cudaMemcpyDeviceToDevice", {"hipMemcpyDeviceToDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - {"cudaMemcpyDefault", {"hipMemcpyDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 - - // CUmem_advise - {"cudaMemoryAdvise", {"hipMemAdvise", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaMemoryAdvise enum values - // CU_MEM_ADVISE_SET_READ_MOSTLY - {"cudaMemAdviseSetReadMostly", {"hipMemAdviseSetReadMostly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_MEM_ADVISE_UNSET_READ_MOSTLY - {"cudaMemAdviseUnsetReadMostly", {"hipMemAdviseUnsetReadMostly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_MEM_ADVISE_SET_PREFERRED_LOCATION - {"cudaMemAdviseSetPreferredLocation", {"hipMemAdviseSetPreferredLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION - {"cudaMemAdviseUnsetPreferredLocation", {"hipMemAdviseUnsetPreferredLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_MEM_ADVISE_SET_ACCESSED_BY - {"cudaMemAdviseSetAccessedBy", {"hipMemAdviseSetAccessedBy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 5 - // CU_MEM_ADVISE_UNSET_ACCESSED_BY - {"cudaMemAdviseUnsetAccessedBy", {"hipMemAdviseUnsetAccessedBy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 6 - - // no analogue - // NOTE: CUmemorytype is partial analogue - {"cudaMemoryType", {"hipMemoryType_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaMemoryType enum values - {"cudaMemoryTypeUnregistered", {"hipMemoryTypeUnregistered", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - {"cudaMemoryTypeHost", {"hipMemoryTypeHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - {"cudaMemoryTypeDevice", {"hipMemoryTypeDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - {"cudaMemoryTypeManaged", {"hipMemoryTypeManaged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // CUmem_range_attribute - {"cudaMemRangeAttribute", {"hipMemRangeAttribute", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaMemRangeAttribute enum values - // CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY - {"cudaMemRangeAttributeReadMostly", {"hipMemRangeAttributeReadMostly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION - {"cudaMemRangeAttributePreferredLocation", {"hipMemRangeAttributePreferredLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY - {"cudaMemRangeAttributeAccessedBy", {"hipMemRangeAttributeAccessedBy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION - {"cudaMemRangeAttributeLastPrefetchLocation", {"hipMemRangeAttributeLastPrefetchLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - - // no analogue - {"cudaOutputMode", {"hipOutputMode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaOutputMode_t", {"hipOutputMode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaOutputMode enum values - {"cudaKeyValuePair", {"hipKeyValuePair", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - {"cudaCSV", {"hipCSV", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - - // CUresourcetype - {"cudaResourceType", {"hipResourceType", "", CONV_TYPE, API_RUNTIME}}, - // cudaResourceType enum values - // CU_RESOURCE_TYPE_ARRAY - {"cudaResourceTypeArray", {"hipResourceTypeArray", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x00 - // CU_RESOURCE_TYPE_MIPMAPPED_ARRAY - {"cudaResourceTypeMipmappedArray", {"hipResourceTypeMipmappedArray", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x01 - // CU_RESOURCE_TYPE_LINEAR - {"cudaResourceTypeLinear", {"hipResourceTypeLinear", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x02 - // CU_RESOURCE_TYPE_PITCH2D - {"cudaResourceTypePitch2D", {"hipResourceTypePitch2D", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x03 - - // CUresourceViewFormat - {"cudaResourceViewFormat", {"hipResourceViewFormat", "", CONV_TYPE, API_RUNTIME}}, - // enum cudaResourceViewFormat - // CU_RES_VIEW_FORMAT_NONE - {"cudaResViewFormatNone", {"hipResViewFormatNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x00 - // CU_RES_VIEW_FORMAT_UINT_1X8 - {"cudaResViewFormatUnsignedChar1", {"hipResViewFormatUnsignedChar1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x01 - // CU_RES_VIEW_FORMAT_UINT_2X8 - {"cudaResViewFormatUnsignedChar2", {"hipResViewFormatUnsignedChar2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x02 - // CU_RES_VIEW_FORMAT_UINT_4X8 - {"cudaResViewFormatUnsignedChar4", {"hipResViewFormatUnsignedChar4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x03 - // CU_RES_VIEW_FORMAT_SINT_1X8 - {"cudaResViewFormatSignedChar1", {"hipResViewFormatSignedChar1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x04 - // CU_RES_VIEW_FORMAT_SINT_2X8 - {"cudaResViewFormatSignedChar2", {"hipResViewFormatSignedChar2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x05 - // CU_RES_VIEW_FORMAT_SINT_4X8 - {"cudaResViewFormatSignedChar4", {"hipResViewFormatSignedChar4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x06 - // CU_RES_VIEW_FORMAT_UINT_1X16 - {"cudaResViewFormatUnsignedShort1", {"hipResViewFormatUnsignedShort1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x07 - // CU_RES_VIEW_FORMAT_UINT_2X16 - {"cudaResViewFormatUnsignedShort2", {"hipResViewFormatUnsignedShort2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x08 - // CU_RES_VIEW_FORMAT_UINT_4X16 - {"cudaResViewFormatUnsignedShort4", {"hipResViewFormatUnsignedShort4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x09 - // CU_RES_VIEW_FORMAT_SINT_1X16 - {"cudaResViewFormatSignedShort1", {"hipResViewFormatSignedShort1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0a - // CU_RES_VIEW_FORMAT_SINT_2X16 - {"cudaResViewFormatSignedShort2", {"hipResViewFormatSignedShort2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0b - // CU_RES_VIEW_FORMAT_SINT_4X16 - {"cudaResViewFormatSignedShort4", {"hipResViewFormatSignedShort4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0c - // CU_RES_VIEW_FORMAT_UINT_1X32 - {"cudaResViewFormatUnsignedInt1", {"hipResViewFormatUnsignedInt1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0d - // CU_RES_VIEW_FORMAT_UINT_2X32 - {"cudaResViewFormatUnsignedInt2", {"hipResViewFormatUnsignedInt2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0e - // CU_RES_VIEW_FORMAT_UINT_4X32 - {"cudaResViewFormatUnsignedInt4", {"hipResViewFormatUnsignedInt4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0f - // CU_RES_VIEW_FORMAT_SINT_1X32 - {"cudaResViewFormatSignedInt1", {"hipResViewFormatSignedInt1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x10 - // CU_RES_VIEW_FORMAT_SINT_2X32 - {"cudaResViewFormatSignedInt2", {"hipResViewFormatSignedInt2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x11 - // CU_RES_VIEW_FORMAT_SINT_4X32 - {"cudaResViewFormatSignedInt4", {"hipResViewFormatSignedInt4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x12 - // CU_RES_VIEW_FORMAT_FLOAT_1X16 - {"cudaResViewFormatHalf1", {"hipResViewFormatHalf1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x13 - // CU_RES_VIEW_FORMAT_FLOAT_2X16 - {"cudaResViewFormatHalf2", {"hipResViewFormatHalf2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x14 - // CU_RES_VIEW_FORMAT_FLOAT_4X16 - {"cudaResViewFormatHalf4", {"hipResViewFormatHalf4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x15 - // CU_RES_VIEW_FORMAT_FLOAT_1X32 - {"cudaResViewFormatFloat1", {"hipResViewFormatFloat1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x16 - // CU_RES_VIEW_FORMAT_FLOAT_2X32 - {"cudaResViewFormatFloat2", {"hipResViewFormatFloat2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x17 - // CU_RES_VIEW_FORMAT_FLOAT_4X32 - {"cudaResViewFormatFloat4", {"hipResViewFormatFloat4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x18 - // CU_RES_VIEW_FORMAT_UNSIGNED_BC1 - {"cudaResViewFormatUnsignedBlockCompressed1", {"hipResViewFormatUnsignedBlockCompressed1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x19 - // CU_RES_VIEW_FORMAT_UNSIGNED_BC2 - {"cudaResViewFormatUnsignedBlockCompressed2", {"hipResViewFormatUnsignedBlockCompressed2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1a - // CU_RES_VIEW_FORMAT_UNSIGNED_BC3 - {"cudaResViewFormatUnsignedBlockCompressed3", {"hipResViewFormatUnsignedBlockCompressed3", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1b - // CU_RES_VIEW_FORMAT_UNSIGNED_BC4 - {"cudaResViewFormatUnsignedBlockCompressed4", {"hipResViewFormatUnsignedBlockCompressed4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1c - // CU_RES_VIEW_FORMAT_SIGNED_BC4 - {"cudaResViewFormatSignedBlockCompressed4", {"hipResViewFormatSignedBlockCompressed4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1d - // CU_RES_VIEW_FORMAT_UNSIGNED_BC5 - {"cudaResViewFormatUnsignedBlockCompressed5", {"hipResViewFormatUnsignedBlockCompressed5", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1e - // CU_RES_VIEW_FORMAT_SIGNED_BC5 - {"cudaResViewFormatSignedBlockCompressed5", {"hipResViewFormatSignedBlockCompressed5", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1f - // CU_RES_VIEW_FORMAT_UNSIGNED_BC6H - {"cudaResViewFormatUnsignedBlockCompressed6H", {"hipResViewFormatUnsignedBlockCompressed6H", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x20 - // CU_RES_VIEW_FORMAT_SIGNED_BC6H - {"cudaResViewFormatSignedBlockCompressed6H", {"hipResViewFormatSignedBlockCompressed6H", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x21 - // CU_RES_VIEW_FORMAT_UNSIGNED_BC7 - {"cudaResViewFormatUnsignedBlockCompressed7", {"hipResViewFormatUnsignedBlockCompressed7", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x22 - - // CUshared_carveout - {"cudaSharedCarveout", {"hipSharedCarveout", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaSharedCarveout enum values - // CU_SHAREDMEM_CARVEOUT_DEFAULT - {"cudaSharedmemCarveoutDefault", {"hipSharedmemCarveoutDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // -1 - // CU_SHAREDMEM_CARVEOUT_MAX_SHARED - {"cudaSharedmemCarveoutMaxShared", {"hipSharedmemCarveoutMaxShared", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 100 - // CU_SHAREDMEM_CARVEOUT_MAX_L1 - {"cudaSharedmemCarveoutMaxL1", {"hipSharedmemCarveoutMaxL1", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - - // CUsharedconfig - {"cudaSharedMemConfig", {"hipSharedMemConfig", "", CONV_TYPE, API_RUNTIME}}, - // cudaSharedMemConfig enum values - // CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00 - {"cudaSharedMemBankSizeDefault", {"hipSharedMemBankSizeDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01 - {"cudaSharedMemBankSizeFourByte", {"hipSharedMemBankSizeFourByte", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02 - {"cudaSharedMemBankSizeEightByte", {"hipSharedMemBankSizeEightByte", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - - // CUstreamCaptureStatus - {"cudaStreamCaptureStatus", {"hipStreamCaptureStatus", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaStreamCaptureStatus enum values - // CU_STREAM_CAPTURE_STATUS_NONE - {"cudaStreamCaptureStatusNone", {"hipStreamCaptureStatusNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_STREAM_CAPTURE_STATUS_ACTIVE - {"cudaStreamCaptureStatusActive", {"hipStreamCaptureStatusActive", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_STREAM_CAPTURE_STATUS_INVALIDATED - {"cudaStreamCaptureStatusInvalidated", {"hipStreamCaptureStatusInvalidated", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUstreamCaptureMode - {"cudaStreamCaptureMode", {"hipStreamCaptureMode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaStreamCaptureMode enum values - // CU_STREAM_CAPTURE_MODE_GLOBAL - {"cudaStreamCaptureModeGlobal", {"hipStreamCaptureModeGlobal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_STREAM_CAPTURE_MODE_THREAD_LOCAL - {"cudaStreamCaptureModeThreadLocal", {"hipStreamCaptureModeThreadLocal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_STREAM_CAPTURE_MODE_RELAXED - {"cudaStreamCaptureModeRelaxed", {"hipStreamCaptureModeRelaxed", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // no analogue - {"cudaSurfaceBoundaryMode", {"hipSurfaceBoundaryMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaSurfaceBoundaryMode enum values - {"cudaBoundaryModeZero", {"hipBoundaryModeZero", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaBoundaryModeClamp", {"hipBoundaryModeClamp", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - {"cudaBoundaryModeTrap", {"hipBoundaryModeTrap", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - - // no analogue - {"cudaSurfaceFormatMode", {"hipSurfaceFormatMode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // enum cudaSurfaceFormatMode - {"cudaFormatModeForced", {"hipFormatModeForced", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - {"cudaFormatModeAuto", {"hipFormatModeAuto", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - - // no analogue - {"cudaTextureAddressMode", {"hipTextureAddressMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaTextureAddressMode enum values - {"cudaAddressModeWrap", {"hipAddressModeWrap", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaAddressModeClamp", {"hipAddressModeClamp", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - {"cudaAddressModeMirror", {"hipAddressModeMirror", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - {"cudaAddressModeBorder", {"hipAddressModeBorder", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - - // CUfilter_mode - {"cudaTextureFilterMode", {"hipTextureFilterMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaTextureFilterMode enum values - // CU_TR_FILTER_MODE_POINT - {"cudaFilterModePoint", {"hipFilterModePoint", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CU_TR_FILTER_MODE_LINEAR - {"cudaFilterModeLinear", {"hipFilterModeLinear", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - - // no analogue - {"cudaTextureReadMode", {"hipTextureReadMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaTextureReadMode enum values - {"cudaReadModeElementType", {"hipReadModeElementType", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaReadModeNormalizedFloat", {"hipReadModeNormalizedFloat", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - - // CUGLDeviceList - {"cudaGLDeviceList", {"hipGLDeviceList", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGLDeviceList enum values - // CU_GL_DEVICE_LIST_ALL = 0x01 - {"cudaGLDeviceListAll", {"hipGLDeviceListAll", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_GL_DEVICE_LIST_CURRENT_FRAME = 0x02 - {"cudaGLDeviceListCurrentFrame", {"hipGLDeviceListCurrentFrame", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_GL_DEVICE_LIST_NEXT_FRAME = 0x03 - {"cudaGLDeviceListNextFrame", {"hipGLDeviceListNextFrame", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // CUGLmap_flags - {"cudaGLMapFlags", {"hipGLMapFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGLMapFlags enum values - // CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00 - {"cudaGLMapFlagsNone", {"hipGLMapFlagsNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01 - {"cudaGLMapFlagsReadOnly", {"hipGLMapFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 - {"cudaGLMapFlagsWriteDiscard", {"hipGLMapFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUd3d9DeviceList - {"cudaD3D9DeviceList", {"hipD3D9DeviceList", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUd3d9DeviceList enum values - // CU_D3D9_DEVICE_LIST_ALL = 0x01 - {"cudaD3D9DeviceListAll", {"HIP_D3D9_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D9_DEVICE_LIST_CURRENT_FRAME = 0x02 - {"cudaD3D9DeviceListCurrentFrame", {"HIP_D3D9_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_D3D9_DEVICE_LIST_NEXT_FRAME = 0x03 - {"cudaD3D9DeviceListNextFrame", {"HIP_D3D9_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // CUd3d9map_flags - {"cudaD3D9MapFlags", {"hipD3D9MapFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D9MapFlags enum values - // CU_D3D9_MAPRESOURCE_FLAGS_NONE = 0x00 - {"cudaD3D9MapFlagsNone", {"HIP_D3D9_MAPRESOURCE_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_D3D9_MAPRESOURCE_FLAGS_READONLY = 0x01 - {"cudaD3D9MapFlagsReadOnly", {"HIP_D3D9_MAPRESOURCE_FLAGS_READONLY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD = 0x02 - {"cudaD3D9MapFlagsWriteDiscard", {"HIP_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUd3d9Register_flags - {"cudaD3D9RegisterFlags", {"hipD3D9RegisterFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D9RegisterFlags enum values - // CU_D3D9_REGISTER_FLAGS_NONE = 0x00 - {"cudaD3D9RegisterFlagsNone", {"HIP_D3D9_REGISTER_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_D3D9_REGISTER_FLAGS_ARRAY = 0x01 - {"cudaD3D9RegisterFlagsArray", {"HIP_D3D9_REGISTER_FLAGS_ARRAY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - - // CUd3d10DeviceList - {"cudaD3D10DeviceList", {"hipd3d10DeviceList", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D10DeviceList enum values - // CU_D3D10_DEVICE_LIST_ALL = 0x01 - {"cudaD3D10DeviceListAll", {"HIP_D3D10_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D10_DEVICE_LIST_CURRENT_FRAME = 0x02 - {"cudaD3D10DeviceListCurrentFrame", {"HIP_D3D10_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_D3D10_DEVICE_LIST_NEXT_FRAME = 0x03 - {"cudaD3D10DeviceListNextFrame", {"HIP_D3D10_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // CUd3d10map_flags - {"cudaD3D10MapFlags", {"hipD3D10MapFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D10MapFlags enum values - // CU_D3D10_MAPRESOURCE_FLAGS_NONE = 0x00 - {"cudaD3D10MapFlagsNone", {"HIP_D3D10_MAPRESOURCE_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_D3D10_MAPRESOURCE_FLAGS_READONLY = 0x01 - {"cudaD3D10MapFlagsReadOnly", {"HIP_D3D10_MAPRESOURCE_FLAGS_READONLY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD = 0x02 - {"cudaD3D10MapFlagsWriteDiscard", {"HIP_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUd3d10Register_flags - {"cudaD3D10RegisterFlags", {"hipD3D10RegisterFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D10RegisterFlags enum values - // CU_D3D10_REGISTER_FLAGS_NONE = 0x00 - {"cudaD3D10RegisterFlagsNone", {"HIP_D3D10_REGISTER_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_D3D10_REGISTER_FLAGS_ARRAY = 0x01 - {"cudaD3D10RegisterFlagsArray", {"HIP_D3D10_REGISTER_FLAGS_ARRAY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - - // CUd3d11DeviceList - {"cudaD3D11DeviceList", {"hipd3d11DeviceList", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D11DeviceList enum values - // CU_D3D11_DEVICE_LIST_ALL = 0x01 - {"cudaD3D11DeviceListAll", {"HIP_D3D11_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D11_DEVICE_LIST_CURRENT_FRAME = 0x02 - {"cudaD3D11DeviceListCurrentFrame", {"HIP_D3D11_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_D3D11_DEVICE_LIST_NEXT_FRAME = 0x03 - {"cudaD3D11DeviceListNextFrame", {"HIP_D3D11_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // no analogue - {"libraryPropertyType", {"hipLibraryPropertyType_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"libraryPropertyType_t", {"hipLibraryPropertyType_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"MAJOR_VERSION", {"hipLibraryMajorVersion", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"MINOR_VERSION", {"hipLibraryMinorVersion", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"PATCH_LEVEL", {"hipLibraryPatchVersion", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 4. Typedefs - - // CUhostFn - {"cudaHostFn_t", {"hipHostFn", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUstreamCallback - {"cudaStreamCallback_t", {"hipStreamCallback_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUsurfObject - {"cudaSurfaceObject_t", {"hipSurfaceObject_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUtexObject - {"cudaTextureObject_t", {"hipTextureObject_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUuuid - {"cudaUUID_t", {"hipUUID_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5. Defines - - // no analogue - {"CUDA_EGL_MAX_PLANES", {"HIP_EGL_MAX_PLANES", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_IPC_HANDLE_SIZE - {"CUDA_IPC_HANDLE_SIZE", {"HIP_IPC_HANDLE_SIZE", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 64 - // no analogue - {"cudaArrayDefault", {"hipArrayDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CUDA_ARRAY3D_LAYERED - {"cudaArrayLayered", {"hipArrayLayered", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CUDA_ARRAY3D_SURFACE_LDST - {"cudaArraySurfaceLoadStore", {"hipArraySurfaceLoadStore", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CUDA_ARRAY3D_CUBEMAP - {"cudaArrayCubemap", {"hipArrayCubemap", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CUDA_ARRAY3D_TEXTURE_GATHER - {"cudaArrayTextureGather", {"hipArrayTextureGather", "", CONV_DEFINE, API_RUNTIME}}, // 0x08 - // CUDA_ARRAY3D_COLOR_ATTACHMENT - {"cudaArrayColorAttachment", {"hipArrayColorAttachment", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x20 - // CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC - {"cudaCooperativeLaunchMultiDeviceNoPreSync", {"hipCooperativeLaunchMultiDeviceNoPreSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC - {"cudaCooperativeLaunchMultiDeviceNoPostSync", {"hipCooperativeLaunchMultiDeviceNoPostSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_DEVICE_CPU ((CUdevice)-1) - {"cudaCpuDeviceId", {"hipCpuDeviceId", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // ((int)-1) - // CU_DEVICE_INVALID ((CUdevice)-2) - {"cudaInvalidDeviceId", {"hipInvalidDeviceId", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // ((int)-2) - // CU_CTX_BLOCKING_SYNC - // NOTE: Deprecated since CUDA 4.0 and replaced with cudaDeviceScheduleBlockingSync - {"cudaDeviceBlockingSync", {"hipDeviceScheduleBlockingSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CU_CTX_LMEM_RESIZE_TO_MAX - {"cudaDeviceLmemResizeToMax", {"hipDeviceLmemResizeToMax", "", CONV_DEFINE, API_RUNTIME}}, // 0x10 - // CU_CTX_MAP_HOST - {"cudaDeviceMapHost", {"hipDeviceMapHost", "", CONV_DEFINE, API_RUNTIME}}, // 0x08 - // CU_CTX_FLAGS_MASK - {"cudaDeviceMask", {"hipDeviceMask", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x1f - // no analogue - {"cudaDevicePropDontCare", {"hipDevicePropDontCare", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CU_CTX_SCHED_AUTO - {"cudaDeviceScheduleAuto", {"hipDeviceScheduleAuto", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_CTX_SCHED_SPIN - {"cudaDeviceScheduleSpin", {"hipDeviceScheduleSpin", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_CTX_SCHED_YIELD - {"cudaDeviceScheduleYield", {"hipDeviceScheduleYield", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_CTX_SCHED_BLOCKING_SYNC - {"cudaDeviceScheduleBlockingSync", {"hipDeviceScheduleBlockingSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CU_CTX_SCHED_MASK - {"cudaDeviceScheduleMask", {"hipDeviceScheduleMask", "", CONV_DEFINE, API_RUNTIME}}, // 0x07 - // CU_EVENT_DEFAULT - {"cudaEventDefault", {"hipEventDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_EVENT_BLOCKING_SYNC - {"cudaEventBlockingSync", {"hipEventBlockingSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_EVENT_DISABLE_TIMING - {"cudaEventDisableTiming", {"hipEventDisableTiming", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_EVENT_INTERPROCESS - {"cudaEventInterprocess", {"hipEventInterprocess", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CUDA_EXTERNAL_MEMORY_DEDICATED - {"cudaExternalMemoryDedicated", {"hipExternalMemoryDedicated", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x1 - // CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC - {"cudaExternalSemaphoreSignalSkipNvSciBufMemSync", {"hipExternalSemaphoreSignalSkipNvSciBufMemSync", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC - {"cudaExternalSemaphoreWaitSkipNvSciBufMemSync", {"hipExternalSemaphoreWaitSkipNvSciBufMemSync", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x02 - // CUDA_NVSCISYNC_ATTR_SIGNAL - {"cudaNvSciSyncAttrSignal", {"hipNvSciSyncAttrSignal", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x1 - // CUDA_NVSCISYNC_ATTR_WAIT - {"cudaNvSciSyncAttrWait", {"hipNvSciSyncAttrWait", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x2 - // no analogue - {"cudaHostAllocDefault", {"hipHostMallocDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_MEMHOSTALLOC_PORTABLE - {"cudaHostAllocPortable", {"hipHostMallocPortable", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_MEMHOSTALLOC_DEVICEMAP - {"cudaHostAllocMapped", {"hipHostMallocMapped", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_MEMHOSTALLOC_WRITECOMBINED - {"cudaHostAllocWriteCombined", {"hipHostMallocWriteCombined", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // no analogue - {"cudaHostRegisterDefault", {"hipHostRegisterDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_MEMHOSTREGISTER_PORTABLE - {"cudaHostRegisterPortable", {"hipHostRegisterPortable", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_MEMHOSTREGISTER_DEVICEMAP - {"cudaHostRegisterMapped", {"hipHostRegisterMapped", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_MEMHOSTREGISTER_IOMEMORY - {"cudaHostRegisterIoMemory", {"hipHostRegisterIoMemory", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS - {"cudaIpcMemLazyEnablePeerAccess", {"hipIpcMemLazyEnablePeerAccess", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_MEM_ATTACH_GLOBAL - {"cudaMemAttachGlobal", {"hipMemAttachGlobal", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_MEM_ATTACH_HOST - {"cudaMemAttachHost", {"hipMemAttachHost", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_MEM_ATTACH_SINGLE - {"cudaMemAttachSingle", {"hipMemAttachSingle", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x04 - // no analogue - {"cudaTextureType1D", {"hipTextureType1D", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // no analogue - {"cudaTextureType2D", {"hipTextureType2D", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // no analogue - {"cudaTextureType3D", {"hipTextureType3D", "", CONV_DEFINE, API_RUNTIME}}, // 0x03 - // no analogue - {"cudaTextureTypeCubemap", {"hipTextureTypeCubemap", "", CONV_DEFINE, API_RUNTIME}}, // 0x0C - // no analogue - {"cudaTextureType1DLayered", {"hipTextureType1DLayered", "", CONV_DEFINE, API_RUNTIME}}, // 0xF1 - // no analogue - {"cudaTextureType2DLayered", {"hipTextureType2DLayered", "", CONV_DEFINE, API_RUNTIME}}, // 0xF2 - // no analogue - {"cudaTextureTypeCubemapLayered", {"hipTextureTypeCubemapLayered", "", CONV_DEFINE, API_RUNTIME}}, // 0xFC - // CU_OCCUPANCY_DEFAULT - {"cudaOccupancyDefault", {"hipOccupancyDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE - {"cudaOccupancyDisableCachingOverride", {"hipOccupancyDisableCachingOverride", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CU_STREAM_DEFAULT - {"cudaStreamDefault", {"hipStreamDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_STREAM_NON_BLOCKING - {"cudaStreamNonBlocking", {"hipStreamNonBlocking", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_STREAM_LEGACY ((CUstream)0x1) - {"cudaStreamLegacy", {"hipStreamLegacy", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // ((cudaStream_t)0x1) - // CU_STREAM_PER_THREAD ((CUstream)0x2) - {"cudaStreamPerThread", {"hipStreamPerThread", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // ((cudaStream_t)0x2) -}; diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp deleted file mode 100644 index 0f3997145e..0000000000 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ /dev/null @@ -1,758 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA SPARSE API functions to the corresponding HIP functions -const std::map CUDA_SPARSE_FUNCTION_MAP{ - // 5. cuSPARSE Helper Function Reference - {"cusparseCreate", {"hipsparseCreate", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateSolveAnalysisInfo", {"hipsparseCreateSolveAnalysisInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateHybMat", {"hipsparseCreateHybMat", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateMatDescr", {"hipsparseCreateMatDescr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroy", {"hipsparseDestroy", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroySolveAnalysisInfo", {"hipsparseDestroySolveAnalysisInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyHybMat", {"hipsparseDestroyHybMat", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroyMatDescr", {"hipsparseDestroyMatDescr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetLevelInfo", {"hipsparseGetLevelInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseGetMatDiagType", {"hipsparseGetMatDiagType", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetMatFillMode", {"hipsparseGetMatFillMode", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetMatIndexBase", {"hipsparseGetMatIndexBase", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetMatType", {"hipsparseGetMatType", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetPointerMode", {"hipsparseGetPointerMode", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetVersion", {"hipsparseGetVersion", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetMatDiagType", {"hipsparseSetMatDiagType", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetMatFillMode", {"hipsparseSetMatFillMode", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetMatIndexBase", {"hipsparseSetMatIndexBase", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetMatType", {"hipsparseSetMatType", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetPointerMode", {"hipsparseSetPointerMode", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetStream", {"hipsparseSetStream", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetStream", {"hipsparseGetStream", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateCsrsv2Info", {"hipsparseCreateCsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroyCsrsv2Info", {"hipsparseDestroyCsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateCsrsm2Info", {"hipsparseCreateCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroyCsrsm2Info", {"hipsparseDestroyCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateCsric02Info", {"hipsparseCreateCsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyCsric02Info", {"hipsparseDestroyCsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateCsrilu02Info", {"hipsparseCreateCsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroyCsrilu02Info", {"hipsparseDestroyCsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateBsrsv2Info", {"hipsparseCreateBsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyBsrsv2Info", {"hipsparseDestroyBsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateBsrsm2Info", {"hipsparseCreateBsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyBsrsm2Info", {"hipsparseDestroyBsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateBsric02Inf", {"hipsparseCreateBsric02Inf", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyBsric02Info", {"hipsparseDestroyBsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateBsrilu02Info", {"hipsparseCreateBsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyBsrilu02Info", {"hipsparseDestroyBsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateCsrgemm2Info", {"hipsparseCreateCsrgemm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroyCsrgemm2Info", {"hipsparseDestroyCsrgemm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreatePruneInfo", {"hipsparseCreatePruneInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyPruneInfo", {"hipsparseDestroyPruneInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 6. cuSPARSE Level 1 Function Reference - {"cusparseSaxpyi", {"hipsparseSaxpyi", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDaxpyi", {"hipsparseDaxpyi", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCaxpyi", {"hipsparseCaxpyi", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZaxpyi", {"hipsparseZaxpyi", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSdoti", {"hipsparseSdoti", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDdoti", {"hipsparseDdoti", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCdoti", {"hipsparseCdoti", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZdoti", {"hipsparseZdoti", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseCdotci", {"hipsparseCdotci", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZdotci", {"hipsparseZdotci", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSgthr", {"hipsparseSgthr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDgthr", {"hipsparseDgthr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCgthr", {"hipsparseCgthr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZgthr", {"hipsparseZgthr", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSgthrz", {"hipsparseSgthrz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDgthrz", {"hipsparseDgthrz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCgthrz", {"hipsparseCgthrz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZgthrz", {"hipsparseZgthrz", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSroti", {"hipsparseSroti", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDroti", {"hipsparseDroti", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSsctr", {"hipsparseSsctr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDsctr", {"hipsparseDsctr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCsctr", {"hipsparseCsctr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZsctr", {"hipsparseZsctr", "", CONV_LIB_FUNC, API_SPARSE}}, - - // 7. cuSPARSE Level 2 Function Reference - {"cusparseSbsrmv", {"hipsparseSbsrmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrmv", {"hipsparseDbsrmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrmv", {"hipsparseCbsrmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrmv", {"hipsparseZbsrmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrxmv", {"hipsparseSbsrxmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrxmv", {"hipsparseDbsrxmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrxmv", {"hipsparseCbsrxmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrxmv", {"hipsparseZbsrxmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrmv", {"hipsparseScsrmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrmv", {"hipsparseDcsrmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrmv", {"hipsparseCcsrmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrmv", {"hipsparseZcsrmv", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseCsrmvEx", {"hipsparseCsrmvEx", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsrmvEx_bufferSize", {"hipsparseCsrmvEx_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrmv_mp", {"hipsparseScsrmv_mp", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrmv_mp", {"hipsparseDcsrmv_mp", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrmv_mp", {"hipsparseCcsrmv_mp", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrmv_mp", {"hipsparseZcsrmv_mp", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgemvi", {"hipsparseSgemvi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgemvi", {"hipsparseDgemvi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgemvi", {"hipsparseCgemvi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgemvi", {"hipsparseZgemvi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgemvi_bufferSize", {"hipsparseSgemvi_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgemvi_bufferSize", {"hipsparseDgemvi_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgemvi_bufferSize", {"hipsparseCgemvi_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgemvi_bufferSize", {"hipsparseZgemvi_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsv2_bufferSize", {"hipsparseSbsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsrsv2_bufferSizeExt", {"hipsparseSbsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsv2_bufferSize", {"hipsparseDbsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsv2_bufferSizeExt", {"hipsparseDbsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsv2_bufferSize", {"hipsparseCbsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsv2_bufferSizeExt", {"hipsparseCbsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsv2_bufferSize", {"hipsparseZbsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsv2_bufferSizeExt", {"hipsparseZbsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsv2_analysis", {"hipsparseSbsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsv2_analysis", {"hipsparseDbsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsv2_analysis", {"hipsparseCbsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsv2_analysis", {"hipsparseZbsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsv_solve", {"hipsparseScsrsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsv_solve", {"hipsparseDcsrsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsv_solve", {"hipsparseCcsrsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsv_solve", {"hipsparseZcsrsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsv2_solve", {"hipsparseSbsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsv2_solve", {"hipsparseDbsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsv2_solve", {"hipsparseCbsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsv2_solve", {"hipsparseZbsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseXbsrsv2_zeroPivot", {"hipsparseXbsrsv2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsv_analysis", {"hipsparseScsrsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsv_analysis", {"hipsparseDcsrsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsv_analysis", {"hipsparseCcsrsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsv_analysis", {"hipsparseZcsrsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseCsrsv_analysisEx", {"hipsparseCsrsv_analysisEx", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsrsv_solveEx", {"hipsparseCsrsv_solveEx", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsv2_bufferSize", {"hipsparseScsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrsv2_bufferSizeExt", {"hipsparseScsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsv2_bufferSize", {"hipsparseDcsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsv2_bufferSizeExt", {"hipsparseDcsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsv2_bufferSize", {"hipsparseCcsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsv2_bufferSizeExt", {"hipsparseCcsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsv2_bufferSize", {"hipsparseZcsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsv2_bufferSizeExt", {"hipsparseZcsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrsv2_analysis", {"hipsparseScsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsv2_analysis", {"hipsparseDcsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsv2_analysis", {"hipsparseCcsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsv2_analysis", {"hipsparseZcsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrsv2_solve", {"hipsparseScsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsv2_solve", {"hipsparseDcsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsv2_solve", {"hipsparseCcsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsv2_solve", {"hipsparseZcsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcsrsv2_zeroPivot", {"hipsparseXcsrsv2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseShybmv", {"hipsparseShybmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDhybmv", {"hipsparseDhybmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseChybmv", {"hipsparseChybmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZhybmv", {"hipsparseZhybmv", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseShybsv_analysis", {"hipsparseShybsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhybsv_analysis", {"hipsparseDhybsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChybsv_analysis", {"hipsparseChybsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhybsv_analysis", {"hipsparseZhybsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseShybsv_solve", {"hipsparseShybsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhybsv_solve", {"hipsparseDhybsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChybsv_solve", {"hipsparseChybsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhybsv_solve", {"hipsparseZhybsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 8. cuSPARSE Level 3 Function Reference - {"cusparseScsrmm", {"hipsparseScsrmm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrmm", {"hipsparseDcsrmm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrmm", {"hipsparseCcsrmm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrmm", {"hipsparseZcsrmm", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrmm2", {"hipsparseScsrmm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrmm2", {"hipsparseDcsrmm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrmm2", {"hipsparseCcsrmm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrmm2", {"hipsparseZcsrmm2", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrsm_analysis", {"hipsparseScsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm_analysis", {"hipsparseDcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm_analysis", {"hipsparseCcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm_analysis", {"hipsparseZcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsm_solve", {"hipsparseScsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsm_solve", {"hipsparseDcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsm_solve", {"hipsparseCcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsm_solve", {"hipsparseZcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrsm2_bufferSizeExt", {"hipsparseScsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsm2_bufferSizeExt", {"hipsparseDcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE,}}, - {"cusparseCcsrsm2_bufferSizeExt", {"hipsparseCcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsm2_bufferSizeExt", {"hipsparseZcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrsm2_analysis", {"hipsparseScsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsm2_analysis", {"hipsparseDcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsm2_analysis", {"hipsparseCcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsm2_analysis", {"hipsparseZcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrsm2_solve", {"hipsparseScsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm2_solve", {"hipsparseDcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm2_solve", {"hipsparseCcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm2_solve", {"hipsparseZcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsrsm2_zeroPivot", {"hipsparseXcsrsm2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSbsrmm", {"hipsparseSbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrmm", {"hipsparseDbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrmm", {"hipsparseCbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrmm", {"hipsparseZbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsm2_bufferSize", {"hipsparseCbsrsm2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsrsm2_bufferSizeExt", {"hipsparseCbsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsm2_bufferSize", {"hipsparseDbsrsm2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsm2_bufferSizeExt", {"hipsparseDbsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsm2_bufferSize", {"hipsparseCbsrsm2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsm2_bufferSizeExt", {"hipsparseCbsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsm2_bufferSize", {"hipsparseZbsrsm2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsm2_bufferSizeExt", {"hipsparseZbsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsm2_analysis", {"hipsparseSbsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsm2_analysis", {"hipsparseDbsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsm2_analysis", {"hipsparseCbsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsm2_analysis", {"hipsparseZbsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsm2_solve", {"hipsparseSbsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsm2_solve", {"hipsparseDbsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsm2_solve", {"hipsparseCbsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsm2_solve", {"hipsparseZbsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXbsrsm2_zeroPivot", {"hipsparseXbsrsm2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgemmi", {"hipsparseSgemmi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgemmi", {"hipsparseDgemmi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgemmi", {"hipsparseCgemmi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgemmi", {"hipsparseZgemmi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 9. cuSPARSE Extra Function Reference - {"cusparseXcsrgeamNnz", {"hipsparseXcsrgeamNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsrgeam", {"hipsparseScsrgeam", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrgeam", {"hipsparseDcsrgeam", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrgeam", {"hipsparseCcsrgeam", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrgeam", {"hipsparseZcsrgeam", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsrgeam2Nnz", {"hipsparseXcsrgeam2Nnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsrgeam2", {"hipsparseScsrgeam2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrgeam2", {"hipsparseDcsrgeam2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrgeam2", {"hipsparseCcsrgeam2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrgeam2", {"hipsparseZcsrgeam2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrgeam2_bufferSizeExt", {"hipsparseScsrgeam2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrgeam2_bufferSizeExt", {"hipsparseDcsrgeam2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrgeam2_bufferSizeExt", {"hipsparseCcsrgeam2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrgeam2_bufferSizeExt", {"hipsparseZcsrgeam2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsrgemmNnz", {"hipsparseXcsrgemmNnz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrgemm", {"hipsparseScsrgemm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrgemm", {"hipsparseDcsrgemm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrgemm", {"hipsparseCcsrgemm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrgemm", {"hipsparseZcsrgemm", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXcsrgemm2Nnz", {"hipsparseXcsrgemm2Nnz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrgemm2", {"hipsparseScsrgemm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrgemm2", {"hipsparseDcsrgemm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrgemm2", {"hipsparseCcsrgemm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrgemm2", {"hipsparseZcsrgemm2", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrgemm2_bufferSizeExt", {"hipsparseScsrgemm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrgemm2_bufferSizeExt", {"hipsparseDcsrgemm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrgemm2_bufferSizeExt", {"hipsparseCcsrgemm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrgemm2_bufferSizeExt", {"hipsparseZcsrgemm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - - // 10. cuSPARSE Preconditioners Reference - // 10.1. Incomplete Cholesky Factorization : level 0 - {"cusparseScsric0", {"hipsparseScsric0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric0", {"hipsparseDcsric0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric0", {"hipsparseCcsric0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric0", {"hipsparseZcsric0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsric02_bufferSize", {"hipsparseScsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsric02_bufferSizeExt", {"hipsparseScsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric02_bufferSize", {"hipsparseDcsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric02_bufferSizeExt", {"hipsparseDcsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric02_bufferSize", {"hipsparseCcsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric02_bufferSizeExt", {"hipsparseCcsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric02_bufferSize", {"hipsparseZcsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric02_bufferSizeExt", {"hipsparseZcsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsric02_analysis", {"hipsparseScsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric02_analysis", {"hipsparseDcsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric02_analysis", {"hipsparseCcsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric02_analysis", {"hipsparseZcsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsric02", {"hipsparseScsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric02", {"hipsparseDcsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric02", {"hipsparseCcsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric02", {"hipsparseZcsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsric02_zeroPivot", {"hipsparseXcsric02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsric02_bufferSize", {"hipsparseSbsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsric02_bufferSizeExt", {"hipsparseSbsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsric02_bufferSize", {"hipsparseDbsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsric02_bufferSizeExt", {"hipsparseDbsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsric02_bufferSize", {"hipsparseCbsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsric02_bufferSizeExt", {"hipsparseCbsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsric02_bufferSize", {"hipsparseZbsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsric02_bufferSizeExt", {"hipsparseZbsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsric02_analysis", {"hipsparseSbsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsric02_analysis", {"hipsparseDbsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsric02_analysis", {"hipsparseCbsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsric02_analysis", {"hipsparseZbsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsric02", {"hipsparseSbsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsric02", {"hipsparseDbsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsric02", {"hipsparseCbsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsric02", {"hipsparseZbsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXbsric02_zeroPivot", {"hipsparseXbsric02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 10.2. Incomplete LU Factorization: level 0 - {"cusparseScsrilu0", {"hipsparseScsrilu0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrilu0", {"hipsparseDcsrilu0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrilu0", {"hipsparseCcsrilu0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrilu0", {"hipsparseZcsrilu0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseCsrilu0Ex", {"hipsparseCsrilu0Ex", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrilu02_numericBoost", {"hipsparseScsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrilu02_numericBoost", {"hipsparseDcsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrilu02_numericBoost", {"hipsparseCcsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrilu02_numericBoost", {"hipsparseZcsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseXcsrilu02_zeroPivot", {"hipsparseXcsrilu02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrilu02_bufferSize", {"hipsparseScsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrilu02_bufferSizeExt", {"hipsparseScsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrilu02_bufferSize", {"hipsparseDcsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrilu02_bufferSizeExt", {"hipsparseDcsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrilu02_bufferSize", {"hipsparseCcsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrilu02_bufferSizeExt", {"hipsparseCcsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrilu02_bufferSize", {"hipsparseZcsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrilu02_bufferSizeExt", {"hipsparseZcsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrilu02_analysis", {"hipsparseScsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrilu02_analysis", {"hipsparseDcsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrilu02_analysis", {"hipsparseCcsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrilu02_analysis", {"hipsparseZcsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrilu02", {"hipsparseScsrilu02", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrilu02", {"hipsparseDcsrilu02", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrilu02", {"hipsparseCcsrilu02", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrilu02", {"hipsparseZcsrilu02", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXbsric02_zeroPivot", {"hipsparseXcsrilu02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSbsrilu02_numericBoost", {"hipsparseSbsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02_numericBoost", {"hipsparseDbsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02_numericBoost", {"hipsparseCbsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02_numericBoost", {"hipsparseZbsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrilu02_bufferSize", {"hipsparseSbsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsrilu02_bufferSizeExt", {"hipsparseSbsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02_bufferSize", {"hipsparseDbsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02_bufferSizeExt", {"hipsparseDbsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02_bufferSize", {"hipsparseCbsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02_bufferSizeExt", {"hipsparseCbsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02_bufferSize", {"hipsparseZbsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02_bufferSizeExt", {"hipsparseZbsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrilu02_analysis", {"hipsparseSbsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02_analysis", {"hipsparseDbsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02_analysis", {"hipsparseCbsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02_analysis", {"hipsparseZbsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrilu02", {"hipsparseSbsrilu02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02", {"hipsparseDbsrilu02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02", {"hipsparseCbsrilu02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02", {"hipsparseZbsrilu02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXbsrilu02_zeroPivot", {"hipsparseXbsrilu02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, - - // 10.3. Tridiagonal Solve - {"cusparseSgtsv", {"hipsparseSgtsv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv", {"hipsparseDgtsv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv", {"hipsparseCgtsv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv", {"hipsparseZgtsv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv_nopivot", {"hipsparseSgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv_nopivot", {"hipsparseDgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv_nopivot", {"hipsparseCgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv_nopivot", {"hipsparseZgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2_bufferSizeExt", {"hipsparseSgtsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2_bufferSizeExt", {"hipsparseDgtsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2_bufferSizeExt", {"hipsparseCgtsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2_bufferSizeExt", {"hipsparseZgtsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2", {"hipsparseSgtsv2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2", {"hipsparseDgtsv2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2", {"hipsparseCgtsv2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2", {"hipsparseZgtsv2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2_nopivot_bufferSizeExt", {"hipsparseSgtsv2_nopivot_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2_nopivot_bufferSizeExt", {"hipsparseDgtsv2_nopivot_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2_nopivot_bufferSizeExt", {"hipsparseCgtsv2_nopivot_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2_nopivot_bufferSizeExt", {"hipsparseZgtsv2_nopivot_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2_nopivot", {"hipsparseSgtsv2_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2_nopivot", {"hipsparseDgtsv2_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2_nopivot", {"hipsparseCgtsv2_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2_nopivot", {"hipsparseZgtsv2_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 10.4. Batched Tridiagonal Solve - {"cusparseSgtsvStridedBatch", {"hipsparseSgtsvStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsvStridedBatch", {"hipsparseDgtsvStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsvStridedBatch", {"hipsparseCgtsvStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsvStridedBatch", {"hipsparseZgtsvStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2StridedBatch_bufferSizeExt", {"hipsparseSgtsv2StridedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2StridedBatch_bufferSizeExt", {"hipsparseDgtsv2StridedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2StridedBatch_bufferSizeExt", {"hipsparseCgtsv2StridedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2StridedBatch_bufferSizeExt", {"hipsparseZgtsv2StridedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2StridedBatch", {"hipsparseSgtsv2StridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2StridedBatch", {"hipsparseDgtsv2StridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2StridedBatch", {"hipsparseCgtsv2StridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2StridedBatch", {"hipsparseZgtsv2StridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsvInterleavedBatch_bufferSizeExt", {"hipsparseSgtsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsvInterleavedBatch_bufferSizeExt", {"hipsparseDgtsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsvInterleavedBatch_bufferSizeExt", {"hipsparseCgtsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsvInterleavedBatch_bufferSizeExt", {"hipsparseZgtsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsvInterleavedBatch", {"hipsparseSgtsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsvInterleavedBatch", {"hipsparseDgtsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsvInterleavedBatch", {"hipsparseCgtsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsvInterleavedBatch", {"hipsparseZgtsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 10.5. Batched Pentadiagonal Solve - {"cusparseSgpsvInterleavedBatch_bufferSizeExt", {"hipsparseSgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgpsvInterleavedBatch_bufferSizeExt", {"hipsparseDgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgpsvInterleavedBatch_bufferSizeExt", {"hipsparseCgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgpsvInterleavedBatch_bufferSizeExt", {"hipsparseZgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgpsvInterleavedBatch", {"hipsparseSgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgpsvInterleavedBatch", {"hipsparseDgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgpsvInterleavedBatch", {"hipsparseCgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgpsvInterleavedBatch", {"hipsparseZgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 11. cuSPARSE Matrix Reorderings Reference - {"cusparseScsrcolor", {"hipsparseScsrcolor", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrcolor", {"hipsparseDcsrcolor", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrcolor", {"hipsparseCcsrcolor", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrcolor", {"hipsparseZcsrcolor", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 12. cuSPARSE Format Conversion Reference - {"cusparseSbsr2csr", {"hipsparseSbsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsr2csr", {"hipsparseDbsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsr2csr", {"hipsparseCbsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsr2csr", {"hipsparseZbsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgebsr2gebsc_bufferSize", {"hipsparseSgebsr2gebsc_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSgebsr2gebsc_bufferSizeExt", {"hipsparseSgebsr2gebsc_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsc_bufferSize", {"hipsparseDgebsr2gebsc_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsc_bufferSizeExt", {"hipsparseDgebsr2gebsc_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsc_bufferSize", {"hipsparseCgebsr2gebsc_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsc_bufferSizeExt", {"hipsparseCgebsr2gebsc_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsc_bufferSize", {"hipsparseZgebsr2gebsc_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsc_bufferSizeExt", {"hipsparseZgebsr2gebsc_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgebsr2gebsc", {"hipsparseSgebsr2gebsc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsc", {"hipsparseDgebsr2gebsc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsc", {"hipsparseCgebsr2gebsc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsc", {"hipsparseZgebsr2gebsc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgebsr2gebsr_bufferSize", {"hipsparseSgebsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSgebsr2gebsr_bufferSizeExt", {"hipsparseSgebsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsr_bufferSize", {"hipsparseDgebsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsr_bufferSizeExt", {"hipsparseDgebsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsr_bufferSize", {"hipsparseCgebsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsr_bufferSizeExt", {"hipsparseCgebsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsr_bufferSize", {"hipsparseZgebsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsr_bufferSizeExt", {"hipsparseZgebsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXgebsr2csr", {"hipsparseXgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSgebsr2csr", {"hipsparseSgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2csr", {"hipsparseDgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2csr", {"hipsparseCgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2csr", {"hipsparseZgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXgebsr2gebsrNnz", {"hipsparseXgebsr2gebsrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSgebsr2gebsr", {"hipsparseSgebsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsr", {"hipsparseDgebsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsr", {"hipsparseCgebsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsr", {"hipsparseZgebsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2gebsr_bufferSize", {"hipsparseScsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsr2gebsr_bufferSizeExt", {"hipsparseScsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2gebsr_bufferSize", {"hipsparseDcsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2gebsr_bufferSizeExt", {"hipsparseDcsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2gebsr_bufferSize", {"hipsparseCcsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2gebsr_bufferSizeExt", {"hipsparseCcsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2gebsr_bufferSize", {"hipsparseZcsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2gebsr_bufferSizeExt", {"hipsparseZcsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsr2gebsrNnz", {"hipsparseXcsr2gebsrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsr2gebsr", {"hipsparseScsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2gebsr", {"hipsparseDcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2gebsr", {"hipsparseCcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2gebsr", {"hipsparseZcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcoo2csr", {"hipsparseXcoo2csr", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsc2dense", {"hipsparseScsc2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsc2dense", {"hipsparseDcsc2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsc2dense", {"hipsparseCcsc2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsc2dense", {"hipsparseZcsc2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsc2hyb", {"hipsparseScsc2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsc2hyb", {"hipsparseDcsc2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsc2hyb", {"hipsparseCcsc2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsc2hyb", {"hipsparseZcsc2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsr2bsrNnz", {"hipsparseXcsr2bsrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsr2bsr", {"hipsparseScsr2bsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2bsr", {"hipsparseDcsr2bsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2bsr", {"hipsparseCcsr2bsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2bsr", {"hipsparseZcsr2bsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsr2coo", {"hipsparseXcsr2coo", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsr2csc", {"hipsparseScsr2csc", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsr2csc", {"hipsparseDcsr2csc", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsr2csc", {"hipsparseCcsr2csc", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsr2csc", {"hipsparseZcsr2csc", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseCsr2cscEx", {"hipsparseCsr2cscEx", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsr2cscEx2", {"hipsparseCsr2cscEx2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsr2cscEx2_bufferSize", {"hipsparseCsr2cscEx2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2dense", {"hipsparseScsr2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2dense", {"hipsparseDcsr2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2dense", {"hipsparseCcsr2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2dense", {"hipsparseZcsr2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2csr_compress", {"hipsparseScsr2csr_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2csr_compress", {"hipsparseDcsr2csr_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2csr_compress", {"hipsparseCcsr2csr_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2csr_compress", {"hipsparseZcsr2csr_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2hyb", {"hipsparseScsr2hyb", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsr2hyb", {"hipsparseDcsr2hyb", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsr2hyb", {"hipsparseCcsr2hyb", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsr2hyb", {"hipsparseZcsr2hyb", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSdense2csc", {"hipsparseSdense2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDdense2csc", {"hipsparseDdense2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCdense2csc", {"hipsparseCdense2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZdense2csc", {"hipsparseZdense2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSdense2csr", {"hipsparseSdense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDdense2csr", {"hipsparseDdense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCdense2csr", {"hipsparseCdense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZdense2csr", {"hipsparseZdense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSdense2hyb", {"hipsparseSdense2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDdense2hyb", {"hipsparseDdense2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCdense2hyb", {"hipsparseCdense2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZdense2hyb", {"hipsparseZdense2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseShyb2csc", {"hipsparseShyb2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhyb2csc", {"hipsparseDhyb2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChyb2csc", {"hipsparseChyb2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhyb2csc", {"hipsparseZhyb2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseShyb2csr", {"hipsparseShyb2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhyb2csr", {"hipsparseDhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChyb2csr", {"hipsparseChyb2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhyb2csr", {"hipsparseZhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseShyb2dense", {"hipsparseShyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhyb2dense", {"hipsparseDhyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChyb2dense", {"hipsparseChyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhyb2dense", {"hipsparseZhyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSnnz", {"hipsparseSnnz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDnnz", {"hipsparseDnnz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCnnz", {"hipsparseCnnz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZnnz", {"hipsparseZnnz", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXcoosort_bufferSizeExt", {"hipsparseXcoosort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcoosortByRow", {"hipsparseXcoosortByRow", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcoosortByColumn", {"hipsparseXcoosortByColumn", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcsrsort", {"hipsparseXcsrsort", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXcscsort_bufferSizeExt", {"hipsparseXcscsort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcscsort", {"hipsparseXcscsort", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseCreateCsru2csrInfo", {"hipsparseCreateCsru2csrInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyCsru2csrInfo", {"hipsparseDestroyCsru2csrInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsru2csr_bufferSizeExt", {"hipsparseScsru2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsru2csr_bufferSizeExt", {"hipsparseDcsru2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsru2csr_bufferSizeExt", {"hipsparseCcsru2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsru2csr_bufferSizeExt", {"hipsparseZcsru2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2csru", {"hipsparseScsr2csru", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2csru", {"hipsparseDcsr2csru", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2csru", {"hipsparseCcsr2csru", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2csru", {"hipsparseZcsr2csru", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csr", {"hipsparseHpruneDense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csr", {"hipsparseSpruneDense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csr", {"hipsparseDpruneDense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csr_bufferSizeExt", {"hipsparseHpruneDense2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csr_bufferSizeExt", {"hipsparseSpruneDense2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csr_bufferSizeExt", {"hipsparseDpruneDense2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csrNnz", {"hipsparseHpruneDense2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csrNnz", {"hipsparseSpruneDense2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csrNnz", {"hipsparseDpruneDense2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csr", {"hipsparseHpruneCsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csr", {"hipsparseSpruneCsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csr", {"hipsparseDpruneCsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csr_bufferSizeExt", {"hipsparseHpruneCsr2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csr_bufferSizeExt", {"hipsparseSpruneCsr2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csr_bufferSizeExt", {"hipsparseDpruneCsr2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csrNnz", {"hipsparseHpruneCsr2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csrNnz", {"hipsparseSpruneCsr2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrNnz", {"hipsparseDpruneCsr2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csrByPercentage", {"hipsparseHpruneDense2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csrByPercentage", {"hipsparseSpruneDense2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csrByPercentage", {"hipsparseDpruneDense2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseHpruneDense2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseSpruneDense2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseDpruneDense2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csrNnzByPercentage", {"hipsparseHpruneDense2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csrNnzByPercentage", {"hipsparseSpruneDense2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csrNnzByPercentage", {"hipsparseDpruneDense2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csrByPercentage", {"hipsparseHpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csrByPercentage", {"hipsparseSpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrByPercentage", {"hipsparseDpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseHpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseSpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseDpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csrNnzByPercentage", {"hipsparseHpruneCsr2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csrNnzByPercentage", {"hipsparseSpruneCsr2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrNnzByPercentage", {"hipsparseDpruneCsr2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSnnz_compress", {"hipsparseSnnz_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnnz_compress", {"hipsparseDnnz_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCnnz_compress", {"hipsparseCnnz_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZnnz_compress", {"hipsparseZnnz_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 13. cuSPARSE Generic API Reference - // Generic Sparse API helper functions - // Sparse Matrix descriptor - {"cusparseCreateCoo", {"hipsparseCreateCoo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateCooAoS", {"hipsparseCreateCooAoS", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateCsr", {"hipsparseCreateCsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroySpMat", {"hipsparseDestroySpMat", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCooGet", {"hipsparseCooGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCooAoSGet", {"hipsparseCooAoSGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsrGet", {"hipsparseCsrGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetFormat", {"hipsparseSpMatGetFormat", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetIndexBase", {"hipsparseSpMatGetIndexBase", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetValues", {"hipsparseSpMatGetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatSetValues", {"hipsparseSpMatSetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetStridedBatch", {"hipsparseSpMatGetStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatSetStridedBatch", {"hipsparseSpMatSetStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatSetNumBatches", {"hipsparseSpMatSetNumBatches", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetNumBatches", {"hipsparseSpMatGetNumBatches", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - // Sparse Vector descriptor - {"cusparseCreateSpVec", {"hipsparseCreateSpVec", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroySpVec", {"hipsparseDestroySpVec", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecGet", {"hipsparseSpVecGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecGetIndexBase", {"hipsparseSpVecGetIndexBase", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecGetValues", {"hipsparseSpVecGetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecSetValues", {"hipsparseSpVecSetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // Generic Dense API helper functions - // Dense Matrix descriptor - {"cusparseCreateDnMat", {"hipsparseCreateDnMat", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyDnMat", {"hipsparseDestroyDnMat", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatGet", {"hipsparseDnMatGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatGetValues", {"hipsparseDnMatGetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatSetValues", {"hipsparseDnMatSetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatSetStridedBatch", {"hipsparseDnMatSetStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatGetStridedBatch", {"hipsparseDnMatGetStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - // Dense Vector descriptor - {"cusparseCreateDnVec", {"hipsparseCreateDnVec", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyDnVec", {"hipsparseDestroyDnVec", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnVecGet", {"hipsparseDnVecGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnVecGetValues", {"hipsparseDnVecGetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnVecSetValues", {"hipsparseDnVecSetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // Sparse Matrix * Matrix Multiplication - {"cusparseSpMM", {"hipsparseSpMM", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMM_bufferSize", {"hipsparseSpMM_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // Sparse Vector * Vector Multiplication - {"cusparseSpVV", {"hipsparseSpVV", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVV_bufferSize", {"hipsparseSpVV_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // Sparse Matrix * Vector Multiplication - {"cusparseSpMV", {"hipsparseSpMV", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMV_bufferSize", {"hipsparseSpMV_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp deleted file mode 100644 index aae85a50d3..0000000000 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA SPARSE API types to the corresponding HIP types -const std::map CUDA_SPARSE_TYPE_NAME_MAP{ - - // 1. Structs - {"cusparseContext", {"hipsparseContext", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseHandle_t", {"hipsparseHandle_t", "", CONV_TYPE, API_SPARSE}}, - - {"cusparseHybMat", {"hipsparseHybMat", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseHybMat_t", {"hipsparseHybMat_t", "", CONV_TYPE, API_SPARSE}}, - - {"cusparseMatDescr", {"hipsparseMatDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseMatDescr_t", {"hipsparseMatDescr_t", "", CONV_TYPE, API_SPARSE}}, - - {"cusparseSolveAnalysisInfo", {"hipsparseSolveAnalysisInfo", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSolveAnalysisInfo_t", {"hipsparseSolveAnalysisInfo_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"csrsv2Info", {"csrsv2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"csrsv2Info_t", {"csrsv2Info_t", "", CONV_TYPE, API_SPARSE}}, - - {"csrsm2Info", {"csrsm2Info", "", CONV_TYPE, API_SPARSE}}, - {"csrsm2Info_t", {"csrsm2Info_t", "", CONV_TYPE, API_SPARSE}}, - - {"bsrsv2Info", {"bsrsv2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"bsrsv2Info_t", {"bsrsv2Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"bsrsm2Info", {"bsrsm2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"bsrsm2Info_t", {"bsrsm2Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"bsric02Info", {"bsric02Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"bsric02Info_t", {"bsric02Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"csrilu02Info", {"csrilu02Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"csrilu02Info_t", {"csrilu02Info_t", "", CONV_TYPE, API_SPARSE}}, - - {"bsrilu02Info", {"bsrilu02Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"bsrilu02Info_t", {"bsrilu02Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"csru2csrInfo", {"csru2csrInfo", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"csru2csrInfo_t", {"csru2csrInfo_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"csrgemm2Info", {"csrgemm2Info", "", CONV_TYPE, API_SPARSE}}, - {"csrgemm2Info_t", {"csrgemm2Info_t", "", CONV_TYPE, API_SPARSE}}, - - {"cusparseColorInfo", {"hipsparseColorInfo", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseColorInfo_t", {"hipsparseColorInfo_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"pruneInfo", {"pruneInfo", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"pruneInfo_t", {"pruneInfo_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSpMatDescr", {"hipsparseSpMatDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatDescr_t", {"hipsparseSpMatDescr_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseDnMatDescr", {"hipsparseDnMatDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatDescr_t", {"hipsparseDnMatDescr_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSpVecDescr", {"hipsparseSpVecDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecDescr_t", {"hipsparseSpVecDescr_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseDnVecDescr", {"hipsparseDnVecDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnVecDescr_t", {"hipsparseDnVecDescr_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - // 2. Enums - {"cusparseAction_t", {"hipsparseAction_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_ACTION_SYMBOLIC", {"HIPSPARSE_ACTION_SYMBOLIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_ACTION_NUMERIC", {"HIPSPARSE_ACTION_NUMERIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseDirection_t", {"hipsparseDirection_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_DIRECTION_ROW", {"HIPSPARSE_DIRECTION_ROW", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_DIRECTION_COLUMN", {"HIPSPARSE_DIRECTION_COLUMN", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseHybPartition_t", {"hipsparseHybPartition_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_HYB_PARTITION_AUTO", {"HIPSPARSE_HYB_PARTITION_AUTO", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_HYB_PARTITION_USER", {"HIPSPARSE_HYB_PARTITION_USER", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_HYB_PARTITION_MAX", {"HIPSPARSE_HYB_PARTITION_MAX", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseDiagType_t", {"hipsparseDiagType_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_DIAG_TYPE_NON_UNIT", {"HIPSPARSE_DIAG_TYPE_NON_UNIT", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_DIAG_TYPE_UNIT", {"HIPSPARSE_DIAG_TYPE_UNIT", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseFillMode_t", {"hipsparseFillMode_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_FILL_MODE_LOWER", {"HIPSPARSE_FILL_MODE_LOWER", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_FILL_MODE_UPPER", {"HIPSPARSE_FILL_MODE_UPPER", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseIndexBase_t", {"hipsparseIndexBase_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_INDEX_BASE_ZERO", {"HIPSPARSE_INDEX_BASE_ZERO", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_INDEX_BASE_ONE", {"HIPSPARSE_INDEX_BASE_ONE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseMatrixType_t", {"hipsparseMatrixType_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_MATRIX_TYPE_GENERAL", {"HIPSPARSE_MATRIX_TYPE_GENERAL", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_MATRIX_TYPE_SYMMETRIC", {"HIPSPARSE_MATRIX_TYPE_SYMMETRIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_MATRIX_TYPE_HERMITIAN", {"HIPSPARSE_MATRIX_TYPE_HERMITIAN", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_MATRIX_TYPE_TRIANGULAR", {"HIPSPARSE_MATRIX_TYPE_TRIANGULAR", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseOperation_t", {"hipsparseOperation_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_OPERATION_NON_TRANSPOSE", {"HIPSPARSE_OPERATION_NON_TRANSPOSE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_OPERATION_TRANSPOSE", {"HIPSPARSE_OPERATION_TRANSPOSE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE", {"HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparsePointerMode_t", {"hipsparsePointerMode_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_POINTER_MODE_HOST", {"HIPSPARSE_POINTER_MODE_HOST", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_POINTER_MODE_DEVICE", {"HIPSPARSE_POINTER_MODE_DEVICE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseAlgMode_t", {"hipsparseAlgMode_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ALG0", {"CUSPARSE_ALG0", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ALG1", {"CUSPARSE_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ALG_NAIVE", {"CUSPARSE_ALG_NAIVE", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ALG_MERGE_PATH", {"CUSPARSE_ALG_MERGE_PATH", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSolvePolicy_t", {"hipsparseSolvePolicy_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_SOLVE_POLICY_NO_LEVEL", {"HIPSPARSE_SOLVE_POLICY_NO_LEVEL", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_SOLVE_POLICY_USE_LEVEL", {"HIPSPARSE_SOLVE_POLICY_USE_LEVEL", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseStatus_t", {"hipsparseStatus_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_STATUS_SUCCESS", {"HIPSPARSE_STATUS_SUCCESS", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_NOT_INITIALIZED", {"HIPSPARSE_STATUS_NOT_INITIALIZED", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_ALLOC_FAILED", {"HIPSPARSE_STATUS_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_INVALID_VALUE", {"HIPSPARSE_STATUS_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_ARCH_MISMATCH", {"HIPSPARSE_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_MAPPING_ERROR", {"HIPSPARSE_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_EXECUTION_FAILED", {"HIPSPARSE_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_INTERNAL_ERROR", {"HIPSPARSE_STATUS_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", {"HIPSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_ZERO_PIVOT", {"HIPSPARSE_STATUS_ZERO_PIVOT", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseCsr2CscAlg_t", {"hipsparseCsr2CscAlg_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSR2CSC_ALG1", {"HIPSPARSE_CSR2CSC_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSR2CSC_ALG2", {"HIPSPARSE_CSR2CSC_ALG2", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseFormat_t", {"hipsparseFormat_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_FORMAT_CSR", {"HIPSPARSE_FORMAT_CSR", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_FORMAT_CSC", {"HIPSPARSE_FORMAT_CSC", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_FORMAT_COO", {"HIPSPARSE_FORMAT_COO", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_FORMAT_COO_AOS", {"HIPSPARSE_FORMAT_COO_AOS", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseOrder_t", {"hipsparseOrder_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ORDER_COL", {"HIPSPARSE_ORDER_COL", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ORDER_ROW", {"HIPSPARSE_ORDER_ROW", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSpMVAlg_t", {"hipsparseSpMVAlg_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_MV_ALG_DEFAULT", {"HIPSPARSE_MV_ALG_DEFAULT", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_COOMV_ALG", {"HIPSPARSE_COOMV_ALG", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSRMV_ALG1", {"HIPSPARSE_CSRMV_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSRMV_ALG2", {"HIPSPARSE_CSRMV_ALG2", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSpMMAlg_t", {"hipsparseSpMMAlg_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_MM_ALG_DEFAULT", {"HIPSPARSE_MM_ALG_DEFAULT", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_COOMM_ALG1", {"HIPSPARSE_COOMM_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_COOMM_ALG2", {"HIPSPARSE_COOMM_ALG2", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_COOMM_ALG3", {"HIPSPARSE_COOMM_ALG3", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSRMM_ALG1", {"HIPSPARSE_CSRMM_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseIndexType_t", {"hipsparseIndexType_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_INDEX_16U", {"HIPSPARSE_INDEX_16U", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_INDEX_32I", {"HIPSPARSE_INDEX_32I", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_INDEX_64I", {"HIPSPARSE_INDEX_64I", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - // 3. Defines - {"CUSPARSE_VER_MAJOR", {"HIPSPARSE_VER_MAJOR", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_VER_MINOR", {"HIPSPARSE_VER_MINOR", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_VER_PATCH", {"HIPSPARSE_VER_PATCH", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_VER_BUILD", {"HIPSPARSE_VER_BUILD", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_VERSION", {"HIPSPARSE_VERSION", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Scripting.h b/hipify-clang/src/CUDA2HIP_Scripting.h deleted file mode 100644 index 76b103735b..0000000000 --- a/hipify-clang/src/CUDA2HIP_Scripting.h +++ /dev/null @@ -1,41 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -extern std::set DeviceSymbolFunctions0; -extern std::set DeviceSymbolFunctions1; -extern std::set ReinterpretFunctions0; -extern std::set ReinterpretFunctions1; - -extern std::string sHIP_SYMBOL; -extern std::string s_reinterpret_cast; - -namespace perl { - - bool generate(bool Generate = true); -} - -namespace python { - - bool generate(bool Generate = true); -} diff --git a/hipify-clang/src/HipifyAction.cpp b/hipify-clang/src/HipifyAction.cpp deleted file mode 100644 index 75138c47ab..0000000000 --- a/hipify-clang/src/HipifyAction.cpp +++ /dev/null @@ -1,755 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include "HipifyAction.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Lex/HeaderSearch.h" -#include "LLVMCompat.h" -#include "CUDA2HIP.h" -#include "StringUtils.h" -#include "ArgParse.h" - -const std::string sHIP = "HIP"; -const std::string sROC = "ROC"; -const std::string sCub = "cub"; -const std::string sHipcub = "hipcub"; -const std::string sHIP_DYNAMIC_SHARED = "HIP_DYNAMIC_SHARED"; -const std::string sHIP_KERNEL_NAME = "HIP_KERNEL_NAME"; -std::string sHIP_SYMBOL = "HIP_SYMBOL"; -std::string s_reinterpret_cast = "reinterpret_cast"; -const std::string sHipLaunchKernelGGL = "hipLaunchKernelGGL"; -const std::string sDim3 = "dim3("; -const std::string s_hiprand_kernel_h = "hiprand_kernel.h"; -const std::string s_hiprand_h = "hiprand.h"; -const std::string sOnce = "once"; -const std::string s_string_literal = "[string literal]"; -// CUDA identifiers, used in matchers -const std::string sCudaMemcpyToSymbol = "cudaMemcpyToSymbol"; -const std::string sCudaMemcpyToSymbolAsync = "cudaMemcpyToSymbolAsync"; -const std::string sCudaGetSymbolSize = "cudaGetSymbolSize"; -const std::string sCudaGetSymbolAddress = "cudaGetSymbolAddress"; -const std::string sCudaMemcpyFromSymbol = "cudaMemcpyFromSymbol"; -const std::string sCudaMemcpyFromSymbolAsync = "cudaMemcpyFromSymbolAsync"; -const std::string sCudaFuncSetCacheConfig = "cudaFuncSetCacheConfig"; -const std::string sCudaFuncGetAttributes = "cudaFuncGetAttributes"; -// Matchers' names -const StringRef sCudaSharedIncompleteArrayVar = "cudaSharedIncompleteArrayVar"; -const StringRef sCudaLaunchKernel = "cudaLaunchKernel"; -const StringRef sCudaHostFuncCall = "cudaHostFuncCall"; -const StringRef sCudaDeviceFuncCall = "cudaDeviceFuncCall"; -const StringRef sCubNamespacePrefix = "cubNamespacePrefix"; -const StringRef sCubFunctionTemplateDecl = "cubFunctionTemplateDecl"; -const StringRef sCubUsingNamespaceDecl = "cubUsingNamespaceDecl"; - -std::set DeviceSymbolFunctions0 { - {sCudaMemcpyToSymbol}, - {sCudaMemcpyToSymbolAsync} -}; - -std::set DeviceSymbolFunctions1 { - {sCudaGetSymbolSize}, - {sCudaGetSymbolAddress}, - {sCudaMemcpyFromSymbol}, - {sCudaMemcpyFromSymbolAsync} -}; - -std::set ReinterpretFunctions{ - {sCudaFuncSetCacheConfig}, - {sCudaFuncGetAttributes} -}; - -std::set ReinterpretFunctions0{ - {sCudaFuncSetCacheConfig} -}; - -std::set ReinterpretFunctions1{ - {sCudaFuncGetAttributes} -}; - -void HipifyAction::RewriteString(StringRef s, clang::SourceLocation start) { - auto &SM = getCompilerInstance().getSourceManager(); - size_t begin = 0; - while ((begin = s.find("cu", begin)) != StringRef::npos) { - const size_t end = s.find_first_of(" ", begin + 4); - StringRef name = s.slice(begin, end); - const auto found = CUDA_RENAMES_MAP().find(name); - if (found != CUDA_RENAMES_MAP().end()) { - StringRef repName = Statistics::isToRoc(found->second) ? found->second.rocName : found->second.hipName; - hipCounter counter = {s_string_literal, "", ConvTypes::CONV_LITERAL, ApiTypes::API_RUNTIME, found->second.supportDegree}; - Statistics::current().incrementCounter(counter, name.str()); - if (!Statistics::isUnsupported(counter)) { - clang::SourceLocation sl = start.getLocWithOffset(begin + 1); - ct::Replacement Rep(SM, sl, name.size(), repName.str()); - clang::FullSourceLoc fullSL(sl, SM); - insertReplacement(Rep, fullSL); - } - } - if (end == StringRef::npos) break; - begin = end + 1; - } -} - -clang::SourceLocation HipifyAction::GetSubstrLocation(const std::string &str, const clang::SourceRange &sr) { - clang::SourceLocation sl(sr.getBegin()); - clang::SourceLocation end(sr.getEnd()); - auto &SM = getCompilerInstance().getSourceManager(); - size_t length = SM.getCharacterData(end) - SM.getCharacterData(sl); - StringRef sfull = StringRef(SM.getCharacterData(sl), length); - size_t offset = sfull.find(str); - if (offset > 0) { - sl = sl.getLocWithOffset(offset); - } - return sl; -} - -/** - * Look at, and consider altering, a given token. - * - * If it's not a CUDA identifier, nothing happens. - * If it's an unsupported CUDA identifier, a warning is emitted. - * Otherwise, the source file is updated with the corresponding hipification. - */ -void HipifyAction::RewriteToken(const clang::Token &t) { - // String literals containing CUDA references need fixing. - if (t.is(clang::tok::string_literal)) { - StringRef s(t.getLiteralData(), t.getLength()); - RewriteString(unquoteStr(s), t.getLocation()); - return; - } else if (!t.isAnyIdentifier()) { - // If it's neither a string nor an identifier, we don't care. - return; - } - StringRef name = t.getRawIdentifier(); - clang::SourceLocation sl = t.getLocation(); - FindAndReplace(name, sl, CUDA_RENAMES_MAP()); -} - -void HipifyAction::FindAndReplace(StringRef name, - clang::SourceLocation sl, - const std::map &repMap, - bool bReplace) { - const auto found = repMap.find(name); - if (found == repMap.end()) { - // So it's an identifier, but not CUDA? Boring. - return; - } - Statistics::current().incrementCounter(found->second, name.str()); - clang::DiagnosticsEngine &DE = getCompilerInstance().getDiagnostics(); - // Warn the user about unsupported identifier. - if (Statistics::isUnsupported(found->second)) { - std::string sWarn; - Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP; - sWarn = "" + sWarn; - const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier is unsupported in %0."); - DE.Report(sl, ID) << sWarn; - return; - } - if (!bReplace) { - return; - } - StringRef repName = Statistics::isToRoc(found->second) ? found->second.rocName : found->second.hipName; - auto &SM = getCompilerInstance().getSourceManager(); - ct::Replacement Rep(SM, sl, name.size(), repName.str()); - clang::FullSourceLoc fullSL(sl, SM); - insertReplacement(Rep, fullSL); -} - -namespace { - -clang::SourceRange getReadRange(clang::SourceManager &SM, const clang::SourceRange &exprRange) { - clang::SourceLocation begin = exprRange.getBegin(); - clang::SourceLocation end = exprRange.getEnd(); - bool beginSafe = !SM.isMacroBodyExpansion(begin) || clang::Lexer::isAtStartOfMacroExpansion(begin, SM, clang::LangOptions{}); - bool endSafe = !SM.isMacroBodyExpansion(end) || clang::Lexer::isAtEndOfMacroExpansion(end, SM, clang::LangOptions{}); - if (beginSafe && endSafe) { - return {SM.getFileLoc(begin), SM.getFileLoc(end)}; - } else { - return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; - } -} - -clang::SourceRange getWriteRange(clang::SourceManager &SM, const clang::SourceRange &exprRange) { - clang::SourceLocation begin = exprRange.getBegin(); - clang::SourceLocation end = exprRange.getEnd(); - // If the range is contained within a macro, update the macro definition. - // Otherwise, use the file location and hope for the best. - if (!SM.isMacroBodyExpansion(begin) || !SM.isMacroBodyExpansion(end)) { - return {SM.getExpansionLoc(begin), SM.getExpansionLoc(end)}; - } - return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; -} - -StringRef readSourceText(clang::SourceManager &SM, const clang::SourceRange &exprRange) { - return clang::Lexer::getSourceText(clang::CharSourceRange::getTokenRange(getReadRange(SM, exprRange)), SM, clang::LangOptions(), nullptr); -} - -/** - * Get a string representation of the expression `arg`, unless it's a defaulting function - * call argument, in which case get a 0. Used for building argument lists to kernel calls. - */ -std::string stringifyZeroDefaultedArg(clang::SourceManager &SM, const clang::Expr *arg) { - if (clang::isa(arg)) return "0"; - else return std::string(readSourceText(SM, arg->getSourceRange())); -} - -} // anonymous namespace - -bool HipifyAction::Exclude(const hipCounter &hipToken) { - switch (hipToken.type) { - case CONV_INCLUDE_CUDA_MAIN_H: - switch (hipToken.apiType) { - case API_DRIVER: - case API_RUNTIME: - if (insertedRuntimeHeader) return true; - insertedRuntimeHeader = true; - return false; - case API_BLAS: - if (insertedBLASHeader) return true; - insertedBLASHeader = true; - return false; - case API_RAND: - if (hipToken.hipName == s_hiprand_kernel_h) { - if (insertedRAND_kernelHeader) return true; - insertedRAND_kernelHeader = true; - return false; - } else if (hipToken.hipName == s_hiprand_h) { - if (insertedRANDHeader) return true; - insertedRANDHeader = true; - return false; - } - case API_DNN: - if (insertedDNNHeader) return true; - insertedDNNHeader = true; - return false; - case API_FFT: - if (insertedFFTHeader) return true; - insertedFFTHeader = true; - return false; - case API_COMPLEX: - if (insertedComplexHeader) return true; - insertedComplexHeader = true; - return false; - case API_SPARSE: - if (insertedSPARSEHeader) return true; - insertedSPARSEHeader = true; - return false; - default: - return false; - } - return false; - case CONV_INCLUDE: - if (hipToken.hipName.empty()) return true; - switch (hipToken.apiType) { - case API_RAND: - if (hipToken.hipName == s_hiprand_kernel_h) { - if (insertedRAND_kernelHeader) return true; - insertedRAND_kernelHeader = true; - } - return false; - default: - return false; - } - return false; - default: - return false; - } - return false; -} - -void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, - const clang::Token&, - StringRef file_name, - bool is_angled, - clang::CharSourceRange filename_range, - const clang::FileEntry*, StringRef, - StringRef, const clang::Module*) { - auto &SM = getCompilerInstance().getSourceManager(); - if (!SM.isWrittenInMainFile(hash_loc)) return; - if (!firstHeader) { - firstHeader = true; - firstHeaderLoc = hash_loc; - } - const auto found = CUDA_INCLUDE_MAP.find(file_name); - if (found == CUDA_INCLUDE_MAP.end()) return; - bool exclude = Exclude(found->second); - Statistics::current().incrementCounter(found->second, file_name.str()); - clang::SourceLocation sl = filename_range.getBegin(); - if (Statistics::isUnsupported(found->second)) { - clang::DiagnosticsEngine &DE = getCompilerInstance().getDiagnostics(); - DE.Report(sl, DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Unsupported CUDA header.")); - return; - } - clang::StringRef newInclude; - // Keep the same include type that the user gave. - if (!exclude) { - clang::SmallString<128> includeBuffer; - llvm::StringRef name = Statistics::isToRoc(found->second) ? found->second.rocName : found->second.hipName; - if (is_angled) newInclude = llvm::Twine("<" + name+ ">").toStringRef(includeBuffer); - else newInclude = llvm::Twine("\"" + name + "\"").toStringRef(includeBuffer); - } else { - // hashLoc is location of the '#', thus replacing the whole include directive by empty newInclude starting with '#'. - sl = hash_loc; - } - const char *B = SM.getCharacterData(sl); - const char *E = SM.getCharacterData(filename_range.getEnd()); - ct::Replacement Rep(SM, sl, E - B, newInclude.str()); - insertReplacement(Rep, clang::FullSourceLoc{sl, SM}); -} - -void HipifyAction::PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntroducerKind Introducer) { - if (pragmaOnce) return; - auto &SM = getCompilerInstance().getSourceManager(); - if (!SM.isWrittenInMainFile(Loc)) return; - clang::Preprocessor &PP = getCompilerInstance().getPreprocessor(); - clang::Token tok; - PP.Lex(tok); - StringRef Text(SM.getCharacterData(tok.getLocation()), tok.getLength()); - if (Text == sOnce) { - pragmaOnce = true; - pragmaOnceLoc = tok.getEndLoc(); - } -} - -bool HipifyAction::cudaLaunchKernel(const mat::MatchFinder::MatchResult &Result) { - auto *launchKernel = Result.Nodes.getNodeAs(sCudaLaunchKernel); - if (!launchKernel) return false; - auto *calleeExpr = launchKernel->getCallee(); - if (!calleeExpr) return false; - auto *caleeDecl = launchKernel->getDirectCallee(); - if (!caleeDecl) return false; - auto *config = launchKernel->getConfig(); - if (!config) return false; - clang::SmallString<40> XStr; - llvm::raw_svector_ostream OS(XStr); - clang::LangOptions DefaultLangOptions; - auto *SM = Result.SourceManager; - clang::SourceRange sr = calleeExpr->getSourceRange(); - std::string kern = readSourceText(*SM, sr).str(); - OS << sHipLaunchKernelGGL << "("; - if (caleeDecl->isTemplateInstantiation()) { - OS << sHIP_KERNEL_NAME << "("; - std::string cub = sCub + "::"; - std::string hipcub; - const auto found = CUDA_CUB_TYPE_NAME_MAP.find(sCub); - if (found != CUDA_CUB_TYPE_NAME_MAP.end()) { - hipcub = found->second.hipName.str() + "::"; - } else { - hipcub = sHipcub + "::"; - } - size_t pos = kern.find(cub); - while (pos != std::string::npos) { - kern.replace(pos, cub.size(), hipcub); - pos = kern.find(cub, pos + hipcub.size()); - } - } - OS << kern; - if (caleeDecl->isTemplateInstantiation()) OS << ")"; - OS << ", "; - // Next up are the four kernel configuration parameters, the last two of which are optional and default to zero. - // Copy the two dimensional arguments verbatim. - for (unsigned int i = 0; i < 2; ++i) { - const std::string sArg = readSourceText(*SM, config->getArg(i)->getSourceRange()).str(); - bool bDim3 = std::equal(sDim3.begin(), sDim3.end(), sArg.c_str()); - OS << (bDim3 ? "" : sDim3) << sArg << (bDim3 ? "" : ")") << ", "; - } - // The stream/memory arguments default to zero if omitted. - OS << stringifyZeroDefaultedArg(*SM, config->getArg(2)) << ", "; - OS << stringifyZeroDefaultedArg(*SM, config->getArg(3)); - // If there are ordinary arguments to the kernel, just copy them verbatim into our new call. - int numArgs = launchKernel->getNumArgs(); - if (numArgs > 0) { - OS << ", "; - // Start of the first argument. - clang::SourceLocation argStart = llcompat::getBeginLoc(launchKernel->getArg(0)); - // End of the last argument. - clang::SourceLocation argEnd = llcompat::getEndLoc(launchKernel->getArg(numArgs - 1)); - OS << readSourceText(*SM, {argStart, argEnd}); - } - OS << ")"; - clang::SourceLocation launchKernelExprLocBeg = launchKernel->getExprLoc(); - clang::SourceLocation launchKernelExprLocEnd = launchKernelExprLocBeg.isMacroID() ? llcompat::getEndOfExpansionRangeForLoc(*SM, launchKernelExprLocBeg) : llcompat::getEndLoc(launchKernel); - clang::SourceLocation launchKernelEnd = llcompat::getEndLoc(launchKernel); - clang::BeforeThanCompare isBefore(*SM); - launchKernelExprLocEnd = isBefore(launchKernelEnd, launchKernelExprLocEnd) ? launchKernelExprLocEnd : launchKernelEnd; - clang::SourceRange replacementRange = getWriteRange(*SM, {launchKernelExprLocBeg, launchKernelExprLocEnd}); - clang::SourceLocation launchBeg = replacementRange.getBegin(); - clang::SourceLocation launchEnd = replacementRange.getEnd(); - if (isBefore(launchBeg, launchEnd)) { - size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(launchEnd, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(launchBeg); - ct::Replacement Rep(*SM, launchBeg, length, OS.str()); - clang::FullSourceLoc fullSL(launchBeg, *SM); - insertReplacement(Rep, fullSL); - hipCounter counter = {sHipLaunchKernelGGL, "", ConvTypes::CONV_KERNEL_LAUNCH, ApiTypes::API_RUNTIME}; - Statistics::current().incrementCounter(counter, sCudaLaunchKernel.str()); - return true; - } - return false; -} - -bool HipifyAction::cudaSharedIncompleteArrayVar(const mat::MatchFinder::MatchResult &Result) { - auto *sharedVar = Result.Nodes.getNodeAs(sCudaSharedIncompleteArrayVar); - if (!sharedVar) return false; - // Example: extern __shared__ uint sRadix1[]; - if (!sharedVar->hasExternalFormalLinkage()) return false; - clang::QualType QT = sharedVar->getType(); - std::string typeName; - if (QT->isIncompleteArrayType()) { - const clang::ArrayType *AT = QT.getTypePtr()->getAsArrayTypeUnsafe(); - QT = AT->getElementType(); - if (QT.getTypePtr()->isBuiltinType()) { - QT = QT.getCanonicalType(); - auto *BT = clang::dyn_cast(QT); - if (BT) { - clang::LangOptions LO; - LO.CUDA = true; - clang::PrintingPolicy policy(LO); - typeName = std::string(BT->getName(policy)); - } - } else { - typeName = QT.getAsString(); - } - } - if (!typeName.empty()) { - clang::SourceLocation slStart = sharedVar->getOuterLocStart(); - clang::SourceLocation slEnd = llcompat::getEndLoc(sharedVar->getTypeSourceInfo()->getTypeLoc()); - auto *SM = Result.SourceManager; - size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1; - std::string varName = sharedVar->getNameAsString(); - std::string repName = sHIP_DYNAMIC_SHARED + "(" + typeName + ", " + varName + ")"; - ct::Replacement Rep(*SM, slStart, repLength, repName); - clang::FullSourceLoc fullSL(slStart, *SM); - insertReplacement(Rep, fullSL); - hipCounter counter = {sHIP_DYNAMIC_SHARED, "", ConvTypes::CONV_EXTERN_SHARED, ApiTypes::API_RUNTIME}; - Statistics::current().incrementCounter(counter, sCudaSharedIncompleteArrayVar.str()); - return true; - } - return false; -} - -bool HipifyAction::cudaDeviceFuncCall(const mat::MatchFinder::MatchResult &Result) { - if (const clang::CallExpr *call = Result.Nodes.getNodeAs(sCudaDeviceFuncCall)) { - auto *funcDcl = call->getDirectCallee(); - if (!funcDcl) return false; - FindAndReplace(funcDcl->getDeclName().getAsString(), llcompat::getBeginLoc(call), CUDA_DEVICE_FUNC_MAP, false); - return true; - } - return false; -} - -bool HipifyAction::cubNamespacePrefix(const mat::MatchFinder::MatchResult &Result) { - if (auto *decl = Result.Nodes.getNodeAs(sCubNamespacePrefix)) { - clang::QualType QT = decl->getUnderlyingType(); - auto *t = QT.getTypePtr(); - if (!t) return false; - const clang::ElaboratedType *et = t->getAs(); - if (!et) return false; - const clang::NestedNameSpecifier *nns = et->getQualifier(); - if (!nns) return false; - const clang::NamespaceDecl *nsd = nns->getAsNamespace(); - if (!nsd) return false; - const clang::TypeSourceInfo *si = decl->getTypeSourceInfo(); - const clang::TypeLoc tloc = si->getTypeLoc(); - const clang::SourceRange sr = tloc.getSourceRange(); - std::string name = nsd->getDeclName().getAsString(); - FindAndReplace(name, GetSubstrLocation(name, sr), CUDA_CUB_TYPE_NAME_MAP); - return true; - } - return false; -} - -bool HipifyAction::cubUsingNamespaceDecl(const mat::MatchFinder::MatchResult &Result) { - if (auto *decl = Result.Nodes.getNodeAs(sCubUsingNamespaceDecl)) { - if (auto nsd = decl->getNominatedNamespace()) { - FindAndReplace(nsd->getDeclName().getAsString(), decl->getIdentLocation(), CUDA_CUB_TYPE_NAME_MAP); - return true; - } - } - return false; -} - -bool HipifyAction::cubFunctionTemplateDecl(const mat::MatchFinder::MatchResult &Result) { - if (auto *decl = Result.Nodes.getNodeAs(sCubFunctionTemplateDecl)) { - auto *Tparams = decl->getTemplateParameters(); - bool ret = false; - for (size_t I = 0; I < Tparams->size(); ++I) { - const clang::ValueDecl *valueDecl = dyn_cast(Tparams->getParam(I)); - if (!valueDecl) continue; - clang::QualType QT = valueDecl->getType(); - auto *t = QT.getTypePtr(); - if (!t) continue; - const clang::ElaboratedType *et = t->getAs(); - if (!et) continue; - const clang::NestedNameSpecifier *nns = et->getQualifier(); - if (!nns) continue; - const clang::NamespaceDecl *nsd = nns->getAsNamespace(); - if (!nsd) continue; - const clang::SourceRange sr = valueDecl->getSourceRange(); - std::string name = nsd->getDeclName().getAsString(); - FindAndReplace(name, GetSubstrLocation(name, sr), CUDA_CUB_TYPE_NAME_MAP); - ret = true; - } - return ret; - } - return false; -} - -bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) { - if (auto *call = Result.Nodes.getNodeAs(sCudaHostFuncCall)) { - if (!call->getNumArgs()) return false; - auto *funcDcl = call->getDirectCallee(); - if (!funcDcl) return false; - std::string sName = funcDcl->getDeclName().getAsString(); - unsigned int argNum = 0; - bool b_reinterpret = (ReinterpretFunctions.find(sName) != ReinterpretFunctions.end()) ? true : false; - if (DeviceSymbolFunctions0.find(sName) != DeviceSymbolFunctions0.end() || sCudaFuncSetCacheConfig == sName) { - argNum = 0; - } else if (call->getNumArgs() > 1 && (DeviceSymbolFunctions1.find(sName) != DeviceSymbolFunctions1.end() || sCudaFuncGetAttributes == sName)) { - argNum = 1; - } else { - return false; - } - clang::SmallString<40> XStr; - llvm::raw_svector_ostream OS(XStr); - clang::SourceRange sr = call->getArg(argNum)->getSourceRange(); - auto *SM = Result.SourceManager; - OS << (b_reinterpret ? s_reinterpret_cast : sHIP_SYMBOL) << "(" << readSourceText(*SM, sr) << ")"; - clang::SourceRange replacementRange = getWriteRange(*SM, { sr.getBegin(), sr.getEnd() }); - clang::SourceLocation s = replacementRange.getBegin(); - clang::SourceLocation e = replacementRange.getEnd(); - clang::LangOptions DefaultLangOptions; - size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(e, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(s); - ct::Replacement Rep(*SM, s, length, OS.str()); - clang::FullSourceLoc fullSL(s, *SM); - insertReplacement(Rep, fullSL); - return true; - } - return false; -} - -void HipifyAction::insertReplacement(const ct::Replacement &rep, const clang::FullSourceLoc &fullSL) { - llcompat::insertReplacement(*replacements, rep); - if (PrintStats) { - rep.getLength(); - Statistics::current().lineTouched(fullSL.getExpansionLineNumber()); - Statistics::current().bytesChanged(rep.getLength()); - } -} - -std::unique_ptr HipifyAction::CreateASTConsumer(clang::CompilerInstance &CI, StringRef) { - Finder.reset(new mat::MatchFinder); - // Replace the <<<...>>> language extension with a hip kernel launch - Finder->addMatcher(mat::cudaKernelCallExpr(mat::isExpansionInMainFile()).bind(sCudaLaunchKernel), this); - Finder->addMatcher( - mat::varDecl( - mat::isExpansionInMainFile(), - mat::allOf( - mat::hasAttr(clang::attr::CUDAShared), - mat::hasType(mat::incompleteArrayType()) - ) - ).bind(sCudaSharedIncompleteArrayVar), - this - ); - Finder->addMatcher( - mat::callExpr( - mat::isExpansionInMainFile(), - mat::callee( - mat::functionDecl( - mat::hasAnyName( - sCudaGetSymbolAddress, - sCudaGetSymbolSize, - sCudaMemcpyFromSymbol, - sCudaMemcpyFromSymbolAsync, - sCudaMemcpyToSymbol, - sCudaMemcpyToSymbolAsync, - sCudaFuncSetCacheConfig, - sCudaFuncGetAttributes - ) - ) - ) - ).bind(sCudaHostFuncCall), - this - ); - Finder->addMatcher( - mat::callExpr( - mat::isExpansionInMainFile(), - mat::callee( - mat::functionDecl( - mat::anyOf( - mat::hasAttr(clang::attr::CUDADevice), - mat::hasAttr(clang::attr::CUDAGlobal) - ), - mat::unless(mat::hasAttr(clang::attr::CUDAHost)) - ) - ) - ).bind(sCudaDeviceFuncCall), - this - ); - Finder->addMatcher( - mat::typedefDecl( - mat::isExpansionInMainFile(), - mat::hasType( - mat::elaboratedType( - mat::hasQualifier( - mat::specifiesNamespace( - mat::hasName(sCub) - ) - ) - ) - ) - ).bind(sCubNamespacePrefix), - this - ); - // TODO: Maybe worth to make it more concrete based on final cubFunctionTemplateDecl - Finder->addMatcher( - mat::functionTemplateDecl( - mat::isExpansionInMainFile() - ).bind(sCubFunctionTemplateDecl), - this - ); - // TODO: Maybe worth to make it more concrete - Finder->addMatcher( - mat::usingDirectiveDecl( - mat::isExpansionInMainFile() - ).bind(sCubUsingNamespaceDecl), - this - ); - // Ownership is transferred to the caller. - return Finder->newASTConsumer(); -} - -void HipifyAction::Ifndef(clang::SourceLocation Loc, const clang::Token &MacroNameTok, const clang::MacroDefinition &MD) { - auto &SM = getCompilerInstance().getSourceManager(); - if (!SM.isWrittenInMainFile(Loc)) return; - StringRef Text(SM.getCharacterData(MacroNameTok.getLocation()), MacroNameTok.getLength()); - Ifndefs.insert(std::make_pair(Text.str(), MacroNameTok.getEndLoc())); -} - -void HipifyAction::EndSourceFileAction() { - // Insert the hip header, if we didn't already do it by accident during substitution. - if (!insertedRuntimeHeader) { - // It's not sufficient to just replace CUDA headers with hip ones, because numerous CUDA headers are - // implicitly included by the compiler. Instead, we _delete_ CUDA headers, and unconditionally insert - // one copy of the hip include into every file. - bool placeForIncludeCalculated = false; - clang::SourceLocation sl, controllingMacroLoc; - auto &SM = getCompilerInstance().getSourceManager(); - clang::Preprocessor &PP = getCompilerInstance().getPreprocessor(); - clang::HeaderSearch &HS = PP.getHeaderSearchInfo(); - clang::ExternalPreprocessorSource *EPL = HS.getExternalLookup(); - const clang::FileEntry *FE = SM.getFileEntryForID(SM.getMainFileID()); - const clang::IdentifierInfo *controllingMacro = HS.getFileInfo(FE).getControllingMacro(EPL); - if (controllingMacro) { - auto found = Ifndefs.find(controllingMacro->getName().str()); - if (found != Ifndefs.end()) { - controllingMacroLoc = found->second; - placeForIncludeCalculated = true; - } - } - if (pragmaOnce) { - if (placeForIncludeCalculated) sl = pragmaOnceLoc < controllingMacroLoc ? pragmaOnceLoc : controllingMacroLoc; - else sl = pragmaOnceLoc; - placeForIncludeCalculated = true; - } - if (!placeForIncludeCalculated) { - if (firstHeader) sl = firstHeaderLoc; - else sl = SM.getLocForStartOfFile(SM.getMainFileID()); - } - clang::FullSourceLoc fullSL(sl, SM); - ct::Replacement Rep(SM, sl, 0, "\n#include \n"); - insertReplacement(Rep, fullSL); - } - clang::ASTFrontendAction::EndSourceFileAction(); -} - -namespace { - -/** - * A silly little class to proxy PPCallbacks back to the HipifyAction class. - */ -class PPCallbackProxy : public clang::PPCallbacks { - HipifyAction &hipifyAction; - -public: - explicit PPCallbackProxy(HipifyAction &action): hipifyAction(action) {} - - void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token &include_token, - StringRef file_name, bool is_angled, clang::CharSourceRange filename_range, - const clang::FileEntry *file, StringRef search_path, StringRef relative_path, - const clang::Module *imported -#if LLVM_VERSION_MAJOR > 6 - , clang::SrcMgr::CharacteristicKind FileType -#endif - ) override { - hipifyAction.InclusionDirective(hash_loc, include_token, file_name, is_angled, filename_range, file, search_path, relative_path, imported); - } - - void PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntroducerKind Introducer) override { - hipifyAction.PragmaDirective(Loc, Introducer); - } - - void Ifndef(clang::SourceLocation Loc, const clang::Token &MacroNameTok, const clang::MacroDefinition &MD) override { - hipifyAction.Ifndef(Loc, MacroNameTok, MD); - } -}; -} - -bool HipifyAction::BeginInvocation(clang::CompilerInstance &CI) { - llcompat::RetainExcludedConditionalBlocks(CI); - return true; -} - -void HipifyAction::ExecuteAction() { - clang::Preprocessor &PP = getCompilerInstance().getPreprocessor(); - auto &SM = getCompilerInstance().getSourceManager(); - // Start lexing the specified input file. - const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID()); - clang::Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts()); - RawLex.SetKeepWhitespaceMode(true); - // Perform a token-level rewrite of CUDA identifiers to hip ones. The raw-mode lexer gives us enough - // information to tell the difference between identifiers, string literals, and "other stuff". It also - // ignores preprocessor directives, so this transformation will operate inside preprocessor-deleted code. - clang::Token RawTok; - RawLex.LexFromRawLexer(RawTok); - while (RawTok.isNot(clang::tok::eof)) { - RewriteToken(RawTok); - RawLex.LexFromRawLexer(RawTok); - } - // Register yourself as the preprocessor callback, by proxy. - PP.addPPCallbacks(std::unique_ptr(new PPCallbackProxy(*this))); - // Now we're done futzing with the lexer, have the subclass proceeed with Sema and AST matching. - clang::ASTFrontendAction::ExecuteAction(); -} - -void HipifyAction::run(const mat::MatchFinder::MatchResult &Result) { - if (cudaLaunchKernel(Result)) return; - if (cudaSharedIncompleteArrayVar(Result)) return; - if (cudaHostFuncCall(Result)) return; - if (cudaDeviceFuncCall(Result)) return; - if (cubNamespacePrefix(Result)) return; - if (cubFunctionTemplateDecl(Result)) return; - if (cubUsingNamespaceDecl(Result)) return; -} diff --git a/hipify-clang/src/HipifyAction.h b/hipify-clang/src/HipifyAction.h deleted file mode 100644 index f70d17dd0b..0000000000 --- a/hipify-clang/src/HipifyAction.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "clang/Lex/PPCallbacks.h" -#include "clang/Tooling/Tooling.h" -#include "clang/Tooling/Core/Replacement.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "ReplacementsFrontendActionFactory.h" -#include "Statistics.h" - -namespace ct = clang::tooling; -namespace mat = clang::ast_matchers; -using namespace llvm; - -/** - * A FrontendAction that hipifies CUDA programs. - */ -class HipifyAction : public clang::ASTFrontendAction, - public mat::MatchFinder::MatchCallback { -private: - ct::Replacements *replacements; - std::map Ifndefs; - std::unique_ptr Finder; - // CUDA implicitly adds its runtime header. We rewrite explicitly-provided CUDA includes with equivalent - // ones, and track - using this flag - if the result led to us including the hip runtime header. If it did - // not, we insert it at the top of the file when we finish processing it. - // This approach means we do the best it's possible to do w.r.t preserving the user's include order. - bool insertedRuntimeHeader = false; - bool insertedBLASHeader = false; - bool insertedRANDHeader = false; - bool insertedRAND_kernelHeader = false; - bool insertedDNNHeader = false; - bool insertedFFTHeader = false; - bool insertedSPARSEHeader = false; - bool insertedComplexHeader = false; - bool firstHeader = false; - bool pragmaOnce = false; - clang::SourceLocation firstHeaderLoc; - clang::SourceLocation pragmaOnceLoc; - // Rewrite a string literal to refer to hip, not CUDA. - void RewriteString(StringRef s, clang::SourceLocation start); - // Replace a CUDA identifier with the corresponding hip identifier, if applicable. - void RewriteToken(const clang::Token &t); - // Calculate str's SourceLocation in SourceRange sr - clang::SourceLocation GetSubstrLocation(const std::string &str, const clang::SourceRange &sr); - -public: - explicit HipifyAction(ct::Replacements *replacements): clang::ASTFrontendAction(), - replacements(replacements) {} - // MatchCallback listeners - bool cudaLaunchKernel(const mat::MatchFinder::MatchResult &Result); - bool cudaSharedIncompleteArrayVar(const mat::MatchFinder::MatchResult &Result); - bool cudaDeviceFuncCall(const mat::MatchFinder::MatchResult &Result); - bool cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result); - bool cubNamespacePrefix(const mat::MatchFinder::MatchResult &Result); - bool cubFunctionTemplateDecl(const mat::MatchFinder::MatchResult &Result); - bool cubUsingNamespaceDecl(const mat::MatchFinder::MatchResult &Result); - // Called by the preprocessor for each include directive during the non-raw lexing pass. - void InclusionDirective(clang::SourceLocation hash_loc, - const clang::Token &include_token, - StringRef file_name, - bool is_angled, - clang::CharSourceRange filename_range, - const clang::FileEntry *file, - StringRef search_path, - StringRef relative_path, - const clang::Module *imported); - // Called by the preprocessor for each pragma directive during the non-raw lexing pass. - void PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntroducerKind Introducer); - // Called by the preprocessor for each ifndef directive during the non-raw lexing pass. - // Found ifndef will be used in EndSourceFileAction() for catching include guard controlling macro. - void Ifndef(clang::SourceLocation Loc, const clang::Token &MacroNameTok, const clang::MacroDefinition &MD); - -protected: - // Add a Replacement for the current file. These will all be applied after executing the FrontendAction. - void insertReplacement(const ct::Replacement &rep, const clang::FullSourceLoc &fullSL); - // FrontendAction entry point. - void ExecuteAction() override; - // Callback before starting processing a single input; used by hipify-clang for setting Preprocessor options. - bool BeginInvocation(clang::CompilerInstance &CI) override; - // Called at the start of each new file to process. - void EndSourceFileAction() override; - // MatchCallback API entry point. Called by the AST visitor while searching the AST for things we registered an interest for. - void run(const mat::MatchFinder::MatchResult &Result) override; - std::unique_ptr CreateASTConsumer(clang::CompilerInstance &CI, StringRef InFile) override; - bool Exclude(const hipCounter &hipToken); - void FindAndReplace(StringRef name, clang::SourceLocation sl, const std::map &repMap, bool bReplace = true); -}; diff --git a/hipify-clang/src/LLVMCompat.cpp b/hipify-clang/src/LLVMCompat.cpp deleted file mode 100644 index f6d74121e4..0000000000 --- a/hipify-clang/src/LLVMCompat.cpp +++ /dev/null @@ -1,154 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "ArgParse.h" -#include "LLVMCompat.h" -#include "llvm/Support/Path.h" -#include "clang/Lex/PreprocessorOptions.h" -#include "clang/Frontend/CompilerInstance.h" - -const std::string sHipify = "[HIPIFY] ", sConflict = "conflict: ", sError = "error: ", sWarning = "warning: "; - -namespace llcompat { - -void PrintStackTraceOnErrorSignal() { - // The signature of PrintStackTraceOnErrorSignal changed in llvm 3.9. We don't support - // anything older than 3.8, so let's specifically detect the one old version we support. -#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 8) - llvm::sys::PrintStackTraceOnErrorSignal(); -#else - llvm::sys::PrintStackTraceOnErrorSignal(StringRef()); -#endif -} - -ct::Replacements &getReplacements(ct::RefactoringTool &Tool, StringRef file) { -#if LLVM_VERSION_MAJOR > 3 - // getReplacements() now returns a map from filename to Replacements - so create an entry - // for this source file and return a reference to it. - return Tool.getReplacements()[std::string(file)]; -#else - return Tool.getReplacements(); -#endif -} - -void insertReplacement(ct::Replacements &replacements, const ct::Replacement &rep) { -#if LLVM_VERSION_MAJOR > 3 - // New clang added error checking to Replacements, and *insists* that you explicitly check it. - llvm::consumeError(replacements.add(rep)); -#else - // In older versions, it's literally an std::set - replacements.insert(rep); -#endif -} - -void EnterPreprocessorTokenStream(clang::Preprocessor &_pp, const clang::Token *start, size_t len, bool DisableMacroExpansion) { -#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 8) - _pp.EnterTokenStream(start, len, false, DisableMacroExpansion); -#else - #if (LLVM_VERSION_MAJOR < 9) - _pp.EnterTokenStream(clang::ArrayRef{start, len}, DisableMacroExpansion); - #else - _pp.EnterTokenStream(clang::ArrayRef{start, len}, DisableMacroExpansion, false); - #endif -#endif -} - -clang::SourceLocation getBeginLoc(const clang::Stmt *stmt) { -#if LLVM_VERSION_MAJOR < 8 - return stmt->getLocStart(); -#else - return stmt->getBeginLoc(); -#endif -} - -clang::SourceLocation getBeginLoc(const clang::TypeLoc &typeLoc) { -#if LLVM_VERSION_MAJOR < 8 - return typeLoc.getLocStart(); -#else - return typeLoc.getBeginLoc(); -#endif -} - -clang::SourceLocation getEndLoc(const clang::Stmt *stmt) { -#if LLVM_VERSION_MAJOR < 8 - return stmt->getLocEnd(); -#else - return stmt->getEndLoc(); -#endif -} - -clang::SourceLocation getEndLoc(const clang::TypeLoc &typeLoc) { -#if LLVM_VERSION_MAJOR < 8 - return typeLoc.getLocEnd(); -#else - return typeLoc.getEndLoc(); -#endif -} - -std::error_code real_path(const Twine &path, SmallVectorImpl &output, - bool expand_tilde) { -#if LLVM_VERSION_MAJOR < 5 - output.clear(); - std::string s = path.str(); - output.append(s.begin(), s.end()); - if (sys::path::is_relative(path)) { - return sys::fs::make_absolute(output); - } - return std::error_code(); -#else - return sys::fs::real_path(path, output, expand_tilde); -#endif -} - -bool pragma_once_outside_header() { -#if LLVM_VERSION_MAJOR < 4 - return false; -#else - return true; -#endif -} - -void RetainExcludedConditionalBlocks(clang::CompilerInstance &CI) { -#if LLVM_VERSION_MAJOR > 9 - clang::PreprocessorOptions &PPOpts = CI.getPreprocessorOpts(); - PPOpts.RetainExcludedConditionalBlocks = !SkipExcludedPPConditionalBlocks; -#endif -} - -bool CheckCompatibility() { -#if LLVM_VERSION_MAJOR < 10 - if (SkipExcludedPPConditionalBlocks) { - llvm::errs() << "\n" << sHipify << sWarning << "Option '" << SkipExcludedPPConditionalBlocks.ArgStr.str() << "' is supported starting from LLVM version 10.0\n"; - } -#endif - return true; -} - -clang::SourceLocation getEndOfExpansionRangeForLoc(const clang::SourceManager &SM, const clang::SourceLocation &loc) { -#if LLVM_VERSION_MAJOR > 6 - return SM.getExpansionRange(loc).getEnd(); -#else - return SM.getExpansionRange(loc).second; -#endif -} - -} // namespace llcompat diff --git a/hipify-clang/src/LLVMCompat.h b/hipify-clang/src/LLVMCompat.h deleted file mode 100644 index 48e008d40d..0000000000 --- a/hipify-clang/src/LLVMCompat.h +++ /dev/null @@ -1,94 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include -#include -#include -#include -#include - -namespace ct = clang::tooling; - -extern const std::string sHipify, sConflict, sError, sWarning; - -// Things for papering over the differences between different LLVM versions. - -namespace llcompat { -/** - * The getNumArgs function on macros was rather unhelpfully renamed in clang 4.0. Its semantics - * remain unchanged, so let's be slightly ugly about it here. :D - */ -#if LLVM_VERSION_MAJOR > 4 - #define GET_NUM_ARGS() getNumParams() -#else - #define GET_NUM_ARGS() getNumArgs() -#endif - -#if LLVM_VERSION_MAJOR < 7 - #define LLVM_DEBUG(X) DEBUG(X) -#endif - -clang::SourceLocation getBeginLoc(const clang::Stmt *stmt); -clang::SourceLocation getBeginLoc(const clang::TypeLoc &typeLoc); - -clang::SourceLocation getEndLoc(const clang::Stmt *stmt); -clang::SourceLocation getEndLoc(const clang::TypeLoc &typeLoc); - -void PrintStackTraceOnErrorSignal(); - -using namespace llvm; - -/** - * Get the replacement map for a given filename in a RefactoringTool. - * - * Older LLVM versions don't actually support multiple filenames, so everything all gets - * smushed together. It is the caller's responsibility to cope with this. - */ -ct::Replacements &getReplacements(ct::RefactoringTool &Tool, StringRef file); - -/** - * Add a Replacement to a Replacements. - */ -void insertReplacement(ct::Replacements &replacements, const ct::Replacement &rep); - -/** - * Version-agnostic version of Preprocessor::EnterTokenStream(). - */ -void EnterPreprocessorTokenStream(clang::Preprocessor &_pp, - const clang::Token *start, - size_t len, - bool DisableMacroExpansion); - -std::error_code real_path(const Twine &path, SmallVectorImpl &output, - bool expand_tilde = false); - -bool pragma_once_outside_header(); - -void RetainExcludedConditionalBlocks(clang::CompilerInstance &CI); - -bool CheckCompatibility(); - -clang::SourceLocation getEndOfExpansionRangeForLoc(const clang::SourceManager &SM, const clang::SourceLocation &loc); - -} // namespace llcompat diff --git a/hipify-clang/src/ReplacementsFrontendActionFactory.h b/hipify-clang/src/ReplacementsFrontendActionFactory.h deleted file mode 100644 index 92d77655af..0000000000 --- a/hipify-clang/src/ReplacementsFrontendActionFactory.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "clang/Tooling/Tooling.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Tooling/Core/Replacement.h" - -namespace ct = clang::tooling; - -/** - * A FrontendActionFactory that propagates a set of Replacements into the FrontendAction. - * This is necessary boilerplate for using a custom FrontendAction with a RefactoringTool. - * - * @tparam T The FrontendAction to create. - */ -template -class ReplacementsFrontendActionFactory : public ct::FrontendActionFactory { - ct::Replacements *replacements; - -public: - explicit ReplacementsFrontendActionFactory(ct::Replacements *r): - ct::FrontendActionFactory(), - replacements(r) {} - -#if LLVM_VERSION_MAJOR < 10 - clang::FrontendAction *create() override { - return new T(replacements); - } -#else - std::unique_ptr create() override { - return std::unique_ptr(new T(replacements)); - } -#endif -}; diff --git a/hipify-clang/src/Statistics.cpp b/hipify-clang/src/Statistics.cpp deleted file mode 100644 index 1f7713cd88..0000000000 --- a/hipify-clang/src/Statistics.cpp +++ /dev/null @@ -1,368 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "Statistics.h" -#include -#include -#include -#include "ArgParse.h" - -const char *counterNames[NUM_CONV_TYPES] = { - "error", // CONV_ERROR - "init", // CONV_INIT - "version", // CONV_VERSION - "device", // CONV_DEVICE - "context", // CONV_CONTEXT - "module", // CONV_MODULE - "memory", // CONV_MEMORY - "virtual_memory", // CONV_VIRTUAL_MEMORY - "addressing", // CONV_ADDRESSING - "stream", // CONV_STREAM - "event", // CONV_EVENT - "external_resource_interop", // CONV_EXT_RES - "stream_memory", // CONV_STREAM_MEMORY - "execution", // CONV_EXECUTION - "graph", // CONV_GRAPH - "occupancy", // CONV_OCCUPANCY - "texture", // CONV_TEXTURE - "surface", // CONV_SURFACE - "peer", // CONV_PEER - "graphics", // CONV_GRAPHICS - "profiler", // CONV_PROFILER - "openGL", // CONV_OPENGL - "D3D9", // CONV_D3D9 - "D3D10", // CONV_D3D10 - "D3D11", // CONV_D3D11 - "VDPAU", // CONV_VDPAU - "EGL", // CONV_EGL - "thread", // CONV_THREAD - "complex", // CONV_COMPLEX - "library", // CONV_LIB_FUNC - "device_library", // CONV_LIB_DEVICE_FUNC - "device_function", // CONV_DEVICE_FUNC - "include", // CONV_INCLUDE - "include_cuda_main_header", // CONV_INCLUDE_CUDA_MAIN_H - "type", // CONV_TYPE - "literal", // CONV_LITERAL - "numeric_literal", // CONV_NUMERIC_LITERAL - "define", // CONV_DEFINE - "extern_shared", // CONV_EXTERN_SHARED - "kernel_launch" // CONV_KERNEL_LAUNCH -}; - -const char *counterTypes[NUM_CONV_TYPES] = { - "CONV_ERROR", - "CONV_INIT", - "CONV_VERSION", - "CONV_DEVICE", - "CONV_CONTEXT", - "CONV_MODULE", - "CONV_MEMORY", - "CONV_VIRTUAL_MEMORY", - "CONV_ADDRESSING", - "CONV_STREAM", - "CONV_EVENT", - "CONV_EXT_RES", - "CONV_STREAM_MEMORY", - "CONV_EXECUTION", - "CONV_GRAPH", - "CONV_OCCUPANCY", - "CONV_TEXTURE", - "CONV_SURFACE", - "CONV_PEER", - "CONV_GRAPHICS", - "CONV_PROFILER", - "CONV_OPENGL", - "CONV_D3D9", - "CONV_D3D10", - "CONV_D3D11", - "CONV_VDPAU", - "CONV_EGL", - "CONV_THREAD", - "CONV_COMPLEX", - "CONV_LIB_FUNC", - "CONV_LIB_DEVICE_FUNC", - "CONV_INCLUDE", - "CONV_INCLUDE_CUDA_MAIN_H", - "CONV_TYPE", - "CONV_LITERAL", - "CONV_NUMERIC_LITERAL", - "CONV_DEFINE", - "CONV_EXTERN_SHARED", - "CONV_KERNEL_LAUNCH" -}; - -const char *apiNames[NUM_API_TYPES] = { - "CUDA Driver API", - "CUDA RT API", - "cuComplex API", - "cuBLAS API", - "cuRAND API", - "cuDNN API", - "cuFFT API", - "cuSPARSE API", - "CUB API", - "CAFFE2 API" -}; - -const char *apiTypes[NUM_API_TYPES] = { - "API_DRIVER", - "API_RUNTIME", - "API_COMPLEX", - "API_BLAS", - "API_RAND", - "API_DNN", - "API_FFT", - "API_CUB", - "API_SPARSE", - "API_CAFFE2" -}; - -namespace { - -template -void conditionalPrint(ST *stream1, - ST2* stream2, - const std::string& s1, - const std::string& s2) { - if (stream1) { - *stream1 << s1; - } - if (stream2) { - *stream2 << s2; - } -} - -// Print a named stat value to both the terminal and the CSV file. -template -void printStat(std::ostream *csv, llvm::raw_ostream* printOut, const std::string &name, T value) { - if (printOut) { - *printOut << " " << name << ": " << value << "\n"; - } - if (csv) { - *csv << name << ";" << value << "\n"; - } -} - -} // Anonymous namespace - -void StatCounter::incrementCounter(const hipCounter &counter, const std::string &name) { - counters[name]++; - apiCounters[(int) counter.apiType]++; - convTypeCounters[(int) counter.type]++; -} - -void StatCounter::add(const StatCounter &other) { - for (const auto &p : other.counters) { - counters[p.first] += p.second; - } - for (int i = 0; i < NUM_API_TYPES; ++i) { - apiCounters[i] += other.apiCounters[i]; - } - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - convTypeCounters[i] += other.convTypeCounters[i]; - } -} - -int StatCounter::getConvSum() { - int acc = 0; - for (const int &i : convTypeCounters) { - acc += i; - } - return acc; -} - -void StatCounter::print(std::ostream* csv, llvm::raw_ostream* printOut, const std::string &prefix) { - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - if (convTypeCounters[i] > 0) { - conditionalPrint(csv, printOut, "\nCUDA ref type;Count\n", "[HIPIFY] info: " + prefix + " refs by type:\n"); - break; - } - } - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - if (convTypeCounters[i] > 0) { - printStat(csv, printOut, counterNames[i], convTypeCounters[i]); - } - } - for (int i = 0; i < NUM_API_TYPES; ++i) { - if (apiCounters[i] > 0) { - conditionalPrint(csv, printOut, "\nCUDA API;Count\n", "[HIPIFY] info: " + prefix + " refs by API:\n"); - break; - } - } - for (int i = 0; i < NUM_API_TYPES; ++i) { - if (apiCounters[i] > 0) { - printStat(csv, printOut, apiNames[i], apiCounters[i]); - } - } - if (counters.size() > 0) { - conditionalPrint(csv, printOut, "\nCUDA ref name;Count\n", "[HIPIFY] info: " + prefix + " refs by names:\n"); - for (const auto &it : counters) { - printStat(csv, printOut, it.first, it.second); - } - } -} - -Statistics::Statistics(const std::string &name): fileName(name) { - // Compute the total bytes/lines in the input file. - std::ifstream src_file(name, std::ios::binary | std::ios::ate); - src_file.clear(); - src_file.seekg(0); - totalLines = (unsigned) std::count(std::istreambuf_iterator(src_file), std::istreambuf_iterator(), '\n'); - totalBytes = (int) src_file.tellg(); - if (totalBytes < 0) { - totalBytes = 0; - } - startTime = chr::steady_clock::now(); -} - -///////// Counter update routines ////////// - -void Statistics::incrementCounter(const hipCounter &counter, const std::string &name) { - if (Statistics::isUnsupported(counter)) { - unsupported.incrementCounter(counter, name); - } else { - supported.incrementCounter(counter, name); - } -} - -void Statistics::add(const Statistics &other) { - supported.add(other.supported); - unsupported.add(other.unsupported); - touchedBytes += other.touchedBytes; - totalBytes += other.totalBytes; - touchedLines += other.touchedLines; - totalLines += other.totalLines; - if (other.hasErrors && !hasErrors) { - hasErrors = true; - } - if (startTime > other.startTime) { - startTime = other.startTime; - } -} - -void Statistics::lineTouched(int lineNumber) { - touchedLinesSet.insert(lineNumber); - touchedLines = unsigned(touchedLinesSet.size()); -} - -void Statistics::bytesChanged(int bytes) { - touchedBytes += bytes; -} - -void Statistics::markCompletion() { - completionTime = chr::steady_clock::now(); -} - -///////// Output functions ////////// - -void Statistics::print(std::ostream* csv, llvm::raw_ostream* printOut, bool skipHeader) { - if (!skipHeader) { - std::string str = "file \'" + fileName + "\' statistics:\n"; - conditionalPrint(csv, printOut, "\n" + str, "\n[HIPIFY] info: " + str); - } - if (hasErrors || totalBytes <= 0 || totalLines <= 0) { - std::string str = "\n ERROR: Statistics is invalid due to failed hipification.\n\n"; - conditionalPrint(csv, printOut, str, str); - } - // Total number of (un)supported refs that were converted. - int supportedSum = supported.getConvSum(); - int unsupportedSum = unsupported.getConvSum(); - int allSum = supportedSum + unsupportedSum; - printStat(csv, printOut, "CONVERTED refs count", supportedSum); - printStat(csv, printOut, "UNCONVERTED refs count", unsupportedSum); - printStat(csv, printOut, "CONVERSION %", 100 - (0 == allSum ? 100 : std::lround(double(unsupportedSum * 100) / double(allSum)))); - printStat(csv, printOut, "REPLACED bytes", touchedBytes); - printStat(csv, printOut, "TOTAL bytes", totalBytes); - printStat(csv, printOut, "CHANGED lines of code", touchedLines); - printStat(csv, printOut, "TOTAL lines of code", totalLines); - printStat(csv, printOut, "CODE CHANGED (in bytes) %", 0 == totalBytes ? 0 : std::lround(double(touchedBytes * 100) / double(totalBytes))); - printStat(csv, printOut, "CODE CHANGED (in lines) %", 0 == totalLines ? 0 : std::lround(double(touchedLines * 100) / double(totalLines))); - typedef std::chrono::duration duration; - duration elapsed = completionTime - startTime; - std::stringstream stream; - stream << std::fixed << std::setprecision(2) << elapsed.count() / 1000; - printStat(csv, printOut, "TIME ELAPSED s", stream.str()); - supported.print(csv, printOut, "CONVERTED"); - unsupported.print(csv, printOut, "UNCONVERTED"); -} - -void Statistics::printAggregate(std::ostream *csv, llvm::raw_ostream* printOut) { - Statistics globalStats = getAggregate(); - // A file is considered "converted" if we made any changes to it. - int convertedFiles = 0; - for (const auto &p : stats) { - if (p.second.touchedLines && p.second.totalBytes && - p.second.totalLines && !p.second.hasErrors) { - convertedFiles++; - } - } - globalStats.markCompletion(); - globalStats.print(csv, printOut); - std::string str = "TOTAL statistics:"; - conditionalPrint(csv, printOut, "\n" + str + "\n", "\n[HIPIFY] info: " + str + "\n"); - printStat(csv, printOut, "CONVERTED files", convertedFiles); - printStat(csv, printOut, "PROCESSED files", stats.size()); -} - -//// Static state management //// - -Statistics Statistics::getAggregate() { - Statistics globalStats("GLOBAL"); - for (const auto &p : stats) { - globalStats.add(p.second); - } - return globalStats; -} - -Statistics &Statistics::current() { - assert(Statistics::currentStatistics); - return *Statistics::currentStatistics; -} - -void Statistics::setActive(const std::string &name) { - stats.emplace(std::make_pair(name, Statistics{name})); - Statistics::currentStatistics = &stats.at(name); -} - -bool Statistics::isToRoc(const hipCounter &counter) { - return TranslateToRoc && counter.apiType == API_BLAS; -} - -bool Statistics::isHipUnsupported(const hipCounter &counter) { - return HIP_UNSUPPORTED == (counter.supportDegree & HIP_UNSUPPORTED); -} - -bool Statistics::isRocUnsupported(const hipCounter &counter) { - return ROC_UNSUPPORTED == (counter.supportDegree & ROC_UNSUPPORTED); -} - -bool Statistics::isUnsupported(const hipCounter &counter) { - if (Statistics::isToRoc(counter)) { - return Statistics::isRocUnsupported(counter); - } else { - return Statistics::isHipUnsupported(counter); - } -} - -std::map Statistics::stats = {}; -Statistics* Statistics::currentStatistics = nullptr; diff --git a/hipify-clang/src/Statistics.h b/hipify-clang/src/Statistics.h deleted file mode 100644 index 6cff9cd9d6..0000000000 --- a/hipify-clang/src/Statistics.h +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace chr = std::chrono; - -enum ConvTypes { - // Driver API: 5.2. Error Handling - // Runtime API: 5.3. Error Handling - CONV_ERROR = 0, - // Driver API : 5.3. Initialization - CONV_INIT, - // Driver API : 5.4. Version Management - // Runtime API: 5.28. Version Management - CONV_VERSION, - // Driver API : 5.5. Device Management, 5.6. Device Management [DEPRECATED] - // Runtime API: 5.1. Device Management - CONV_DEVICE, - // Driver API : 5.7. Primary Context Management, 5.8.Context Management, 5.9. Context Management [DEPRECATED] - CONV_CONTEXT, - // Driver API : 5.10. Module Management - CONV_MODULE, - // Driver API : 5.11. Memory Management - // Runtime API: 5.9. Memory Management, 5.10. Memory Management [DEPRECATED] - CONV_MEMORY, - // Driver API : 5.12. Virtual Memory Management - CONV_VIRTUAL_MEMORY, - // Driver API : 5.13. Unified Addressing - // Runtime API: 5.11. Unified Addressing - CONV_ADDRESSING, - // Driver API : 5.14. Stream Management - // Runtime API: 5.4. Stream Management - CONV_STREAM, - // Driver API : 5.15. Event Management - // Runtime API: 5.5. Event Management - CONV_EVENT, - // Driver API : 5.16. External Resource Interoperability - // Runtime API: 5.6.External Resource Interoperability - CONV_EXT_RES, - // Driver API : 5.17. Stream memory operations - CONV_STREAM_MEMORY, - // Driver API : 5.18. Execution Control, 5.19. Execution Control [DEPRECATED] - // Runtime API: 5.7.Execution Control, Former 5.9. Execution Control [DEPRECATED] - CONV_EXECUTION, - // Driver API : 5.20. Graph Management - // Runtime API: 5.29. Graph Management - CONV_GRAPH, - // Driver API : 5.21. Occupancy - // Runtime API: 5.8. Occupancy - CONV_OCCUPANCY, - // Driver API : 5.22. Texture Reference Management [DEPRECATED], 5.24. Texture Object Management - // Runtime API: 5.24. Texture Reference Management [DEPRECATED], 5.26. Texture Object Management - CONV_TEXTURE, - // Driver API : 5.23. Surface Reference Management [DEPRECATED], 5.25. Surface Object Management - // Runtime API: 5.25. Surface Reference Management [DEPRECATED], 5.27. Surface Object Management - CONV_SURFACE, - // Driver API : 5.26. Peer Context Memory Access - // Runtime API: 5.12. Peer Device Memory Access - CONV_PEER, - // Driver API : 5.27. Graphics Interoperability - // Runtime API: 5.23. Graphics Interoperability - CONV_GRAPHICS, - // Driver API : 5.28. Profiler Control - // Runtime API: 5.32. Profiler Control - CONV_PROFILER, - // Driver API : 5.29. OpenGL Interoperability - // Runtime API: 5.13. OpenGL Interoperability, 5.14. OpenGL Interoperability [DEPRECATED] - CONV_OPENGL, - // Driver API : 5.30. Direct3D 9 Interoperability - // Runtime API: 5.15. Direct3D 9 Interoperability, 5.16. Direct3D 9 Interoperability [DEPRECATED] - CONV_D3D9, - // Driver API : 5.31. Direct3D 10 Interoperability - // Runtime API: 5.17. Direct3D 10 Interoperability, 5.18. Direct3D 10 Interoperability [DEPRECATED] - CONV_D3D10, - // Driver API : 5.32. Direct3D 11 Interoperability - // Runtime API: 5.19. Direct3D 11 Interoperability, 5.20. Direct3D 11 Interoperability [DEPRECATED] - CONV_D3D11, - // Driver API : 5.33. VDPAU Interoperability - // Runtime API: 5.21. VDPAU Interoperability - CONV_VDPAU, - // Driver API : 5.34. EGL Interoperability - // Runtime API: 5.22. EGL Interoperability - CONV_EGL, - // Runtime API: 5.2. Thread Management [DEPRECATED] - CONV_THREAD, - CONV_COMPLEX, - CONV_LIB_FUNC, - CONV_LIB_DEVICE_FUNC, - CONV_DEVICE_FUNC, - CONV_INCLUDE, - CONV_INCLUDE_CUDA_MAIN_H, - CONV_TYPE, - CONV_LITERAL, - CONV_NUMERIC_LITERAL, - CONV_DEFINE, - CONV_EXTERN_SHARED, - CONV_KERNEL_LAUNCH, - CONV_LAST -}; -constexpr int NUM_CONV_TYPES = (int) ConvTypes::CONV_LAST; - -enum ApiTypes { - API_DRIVER = 0, - API_RUNTIME, - API_COMPLEX, - API_BLAS, - API_RAND, - API_DNN, - API_FFT, - API_SPARSE, - API_CUB, - API_CAFFE2, - API_LAST -}; -constexpr int NUM_API_TYPES = (int) ApiTypes::API_LAST; - -enum SupportDegree { - FULL = 0, - HIP_UNSUPPORTED = 1, - ROC_UNSUPPORTED = 2, - UNSUPPORTED = 3 -}; - -// The names of various fields in in the statistics reports. -extern const char *counterNames[NUM_CONV_TYPES]; -extern const char *counterTypes[NUM_CONV_TYPES]; -extern const char *apiNames[NUM_API_TYPES]; -extern const char *apiTypes[NUM_API_TYPES]; - -struct hipCounter { - llvm::StringRef hipName; - llvm::StringRef rocName; - ConvTypes type; - ApiTypes apiType; - SupportDegree supportDegree; -}; - -/** - * Tracks a set of named counters, as well as counters for each of the type enums defined above. - */ -class StatCounter { -private: - // Each thing we track is either "supported" or "unsupported"... - std::map counters; - int apiCounters[NUM_API_TYPES] = {}; - int convTypeCounters[NUM_CONV_TYPES] = {}; - -public: - void incrementCounter(const hipCounter &counter, const std::string &name); - // Add the counters from `other` onto the counters of this object. - void add(const StatCounter &other); - int getConvSum(); - void print(std::ostream* csv, llvm::raw_ostream* printOut, const std::string &prefix); -}; - -/** - * Tracks the statistics for a single input file. - */ -class Statistics { - StatCounter supported; - StatCounter unsupported; - std::string fileName; - std::set touchedLinesSet = {}; - unsigned touchedLines = 0; - unsigned totalLines = 0; - unsigned touchedBytes = 0; - int totalBytes = 0; - chr::steady_clock::time_point startTime; - chr::steady_clock::time_point completionTime; - -public: - Statistics(const std::string &name); - void incrementCounter(const hipCounter &counter, const std::string &name); - // Add the counters from `other` onto the counters of this object. - void add(const Statistics &other); - void lineTouched(int lineNumber); - void bytesChanged(int bytes); - // Set the completion timestamp to now. - void markCompletion(); - -public: - /** - * Pretty-print the statistics stored in this object. - * - * @param csv Pointer to an output stream for the CSV to write. If null, no CSV is written - * @param printOut Pointer to an output stream to print human-readable textual stats to. If null, no - * such stats are produced. - */ - void print(std::ostream* csv, llvm::raw_ostream* printOut, bool skipHeader = false); - // Print aggregated statistics for all registered counters. - static void printAggregate(std::ostream *csv, llvm::raw_ostream* printOut); - // The Statistics for each input file. - static std::map stats; - // The Statistics objects for the currently-being-processed input file. - static Statistics* currentStatistics; - // Aggregate statistics over all entries in `stats` and return the resulting Statistics object. - static Statistics getAggregate(); - /** - * Convenient global entry point for updating the "active" Statistics. Since we operate single-threadedly - * processing one file at a time, this allows us to simply expose the stats for the current file globally, - * simplifying things. - */ - static Statistics ¤t(); - /** - * Set the active Statistics object to the named one, creating it if necessary, and write the completion - * timestamp into the currently active one. - */ - static void setActive(const std::string &name); - // Check the counter and option TranslateToRoc whether it should be translated to Roc or not. - static bool isToRoc(const hipCounter &counter); - // Check whether the counter is HIP_UNSUPPORTED or not. - static bool isHipUnsupported(const hipCounter &counter); - // Check whether the counter is ROC_UNSUPPORTED or not. - static bool isRocUnsupported(const hipCounter &counter); - /** - * Check whether the counter is ROC_UNSUPPORTED/HIP_UNSUPPORTED/UNSUPPORTED or not - * based on counter's API_TYPE and option TranslateToRoc. - */ - static bool isUnsupported(const hipCounter &counter); - // Set this flag in case of hipification errors - bool hasErrors = false; -}; diff --git a/hipify-clang/src/StringUtils.cpp b/hipify-clang/src/StringUtils.cpp deleted file mode 100644 index 31ab331174..0000000000 --- a/hipify-clang/src/StringUtils.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "StringUtils.h" -#include "LLVMCompat.h" -#include "llvm/ADT/SmallString.h" - -using namespace llvm; - -llvm::StringRef unquoteStr(llvm::StringRef s) { - if (s.size() > 1 && s.front() == '"' && s.back() == '"') { - return s.substr(1, s.size() - 2); - } - return s; -} - -void removePrefixIfPresent(std::string &s, const std::string &prefix) { - if (s.find(prefix) != 0) { - return; - } - s.erase(0, prefix.size()); -} - -std::string getAbsoluteFilePath(const std::string &sFile, std::error_code &EC) { - if (sFile.empty()) { - return sFile; - } - if (!sys::fs::exists(sFile)) { - llvm::errs() << "\n" << sHipify << sError << "source file: " << sFile << " doesn't exist\n"; - EC = std::error_code(static_cast(std::errc::no_such_file_or_directory), std::generic_category()); - return ""; - } - SmallString<256> fileAbsPath; - EC = llcompat::real_path(sFile, fileAbsPath, true); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": source file: " << sFile << "\n"; - return ""; - } - EC = std::error_code(); - return fileAbsPath.c_str(); -} - -std::string getAbsoluteDirectoryPath(const std::string &sDir, std::error_code &EC, - const std::string &sDirType, bool bCreateDir) { - if (sDir.empty()) { - return sDir; - } - EC = std::error_code(); - SmallString<256> dirAbsPath; - if (sys::fs::exists(sDir)) { - if (sys::fs::is_regular_file(sDir)) { - llvm::errs() << "\n" << sHipify << sError << sDir << " is not a directory\n"; - EC = std::error_code(static_cast(std::errc::not_a_directory), std::generic_category()); - return ""; - } - } - else { - if (bCreateDir) { - EC = sys::fs::create_directory(sDir); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << sDirType << " directory: " << sDir << "\n"; - return ""; - } - } - else { - llvm::errs() << "\n" << sHipify << sError << sDirType << " directory: " << sDir << " doesn't exist\n"; - EC = std::error_code(static_cast(std::errc::no_such_file_or_directory), std::generic_category()); - return ""; - } - } - EC = llcompat::real_path(sDir, dirAbsPath, true); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << sDirType << " directory: " << sDir << "\n"; - return ""; - } - return dirAbsPath.c_str(); -} - diff --git a/hipify-clang/src/StringUtils.h b/hipify-clang/src/StringUtils.h deleted file mode 100644 index ecbca5e832..0000000000 --- a/hipify-clang/src/StringUtils.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include -#include "llvm/ADT/StringRef.h" - -/** - * Remove double-quotes from the start/end of a string, if present. - */ -llvm::StringRef unquoteStr(llvm::StringRef s); - -/** - * If `s` starts with `prefix`, remove it. Otherwise, does nothing. - */ -void removePrefixIfPresent(std::string &s, const std::string &prefix); - -/** - * Returns Absolute File Path based on filename, otherwise - error. - */ -std::string getAbsoluteFilePath(const std::string &sFile, std::error_code &EC); - -/** - * Returns Absolute Directory Path based on directory name, otherwise - error; - * by default the directory is temporary and created. - */ -std::string getAbsoluteDirectoryPath(const std::string &sDir, std::error_code &EC, - const std::string &sDirType = "temporary", bool bCreateDir = true); diff --git a/hipify-clang/src/main.cpp b/hipify-clang/src/main.cpp deleted file mode 100644 index cb411eba2f..0000000000 --- a/hipify-clang/src/main.cpp +++ /dev/null @@ -1,352 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include "CUDA2HIP.h" -#include "CUDA2HIP_Scripting.h" -#include "LLVMCompat.h" -#include "HipifyAction.h" -#include "ArgParse.h" -#include "StringUtils.h" -#include "llvm/Support/Debug.h" -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticIDs.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Driver.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Tool.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" - -#if LLVM_VERSION_MAJOR < 8 -#include "llvm/Support/Path.h" -#endif - -constexpr auto DEBUG_TYPE = "cuda2hip"; - -namespace ct = clang::tooling; - -void cleanupHipifyOptions(std::vector &args) { - std::vector hipifyOptions = {"-perl", "-python", "-roc", "-inplace", - "-no-backup", "-no-output", "-print-stats", - "-print-stats-csv", "-examine", "-save-temps", - "-skip-excluded-preprocessor-conditional-blocks"}; - for (const auto &a : hipifyOptions) { - args.erase(std::remove(args.begin(), args.end(), a), args.end()); - args.erase(std::remove(args.begin(), args.end(), "-" + a), args.end()); - } - std::vector hipifyDirOptions = {"-o-dir", "-o-hipify-perl-dir", "-o-stats", - "-o-python-map-dir", "-temp-dir"}; - for (const auto &a : hipifyDirOptions) { - // remove all pairs of arguments "-option value" - auto it = args.erase(std::remove(args.begin(), args.end(), a), args.end()); - if (it != args.end()) { - args.erase(it); - } - // remove all pairs of arguments "--option value" - it = args.erase(std::remove(args.begin(), args.end(), "-" + a), args.end()); - if (it != args.end()) { - args.erase(it); - } - // remove all "-option=value" and "--option=value" - args.erase( - std::remove_if(args.begin(), args.end(), - [a](const std::string &s) { return s.find(a + "=") == 0 || s.find("-" + a + "=") == 0; } - ), - args.end() - ); - } -} - -void sortInputFiles(int argc, const char **argv, std::vector &files) { - if (files.size() < 2) return; - IntrusiveRefCntPtr diagOpts(new clang::DiagnosticOptions()); - clang::TextDiagnosticPrinter diagClient(llvm::errs(), &*diagOpts); - clang::DiagnosticsEngine Diagnostics(IntrusiveRefCntPtr(new clang::DiagnosticIDs()), &*diagOpts, &diagClient, false); - std::unique_ptr driver(new clang::driver::Driver("", "nvptx64-nvidia-cuda", Diagnostics)); - std::vector Args(argv, argv + argc); - cleanupHipifyOptions(Args); - std::unique_ptr C(driver->BuildCompilation(Args)); - std::vector sortedFiles; - for (const auto &J : C->getJobs()) { - if (std::string(J.getCreator().getName()) != "clang") continue; - const auto &JA = J.getArguments(); - for (size_t i = 0; i < JA.size(); ++i) { - const auto &A = std::string(JA[i]); - if (std::find(files.begin(), files.end(), A) != files.end() && - i > 0 && std::string(JA[i - 1]) == "-main-file-name") { - sortedFiles.push_back(A); - } - } - } - if (sortedFiles.empty()) return; - std::reverse(sortedFiles.begin(), sortedFiles.end()); - files.assign(sortedFiles.begin(), sortedFiles.end()); -} - -void appendArgumentsAdjusters(ct::RefactoringTool &Tool, const std::string &sSourceAbsPath, const char *hipify_exe) { - if (!IncludeDirs.empty()) { - for (std::string s : IncludeDirs) { - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(s.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-I", ct::ArgumentInsertPosition::BEGIN)); - } - } - if (!MacroNames.empty()) { - for (std::string s : MacroNames) { - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(s.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-D", ct::ArgumentInsertPosition::BEGIN)); - } - } - // Includes for clang's CUDA wrappers for using by packaged hipify-clang - static int Dummy; - std::string hipify = llvm::sys::fs::getMainExecutable(hipify_exe, (void *)&Dummy); - std::string clang_inc_path = std::string(llvm::sys::path::parent_path(hipify)); - clang_inc_path.append("/include"); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-isystem", ct::ArgumentInsertPosition::BEGIN)); - clang_inc_path.append("/cuda_wrappers"); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-isystem", ct::ArgumentInsertPosition::BEGIN)); - // Ensure at least c++11 is used. - std::string stdCpp = "-std=c++11"; -#if defined(_MSC_VER) - stdCpp = "-std=c++14"; -#endif - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(stdCpp.c_str(), ct::ArgumentInsertPosition::BEGIN)); - std::string sInclude = "-I" + sys::path::parent_path(sSourceAbsPath).str(); -#if defined(HIPIFY_CLANG_RES) - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-resource-dir=" HIPIFY_CLANG_RES, ct::ArgumentInsertPosition::BEGIN)); -#endif - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sInclude.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-fno-delayed-template-parsing", ct::ArgumentInsertPosition::BEGIN)); - if (llcompat::pragma_once_outside_header()) { - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Wno-pragma-once-outside-header", ct::ArgumentInsertPosition::BEGIN)); - } - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("--cuda-host-only", ct::ArgumentInsertPosition::BEGIN)); - if (!CudaGpuArch.empty()) { - std::string sCudaGpuArch = "--cuda-gpu-arch=" + CudaGpuArch; - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sCudaGpuArch.c_str(), ct::ArgumentInsertPosition::BEGIN)); - } - if (!CudaPath.empty()) { - std::string sCudaPath = "--cuda-path=" + CudaPath; - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sCudaPath.c_str(), ct::ArgumentInsertPosition::BEGIN)); - } - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("cuda", ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-x", ct::ArgumentInsertPosition::BEGIN)); - if (Verbose) { - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-v", ct::ArgumentInsertPosition::END)); - } - Tool.appendArgumentsAdjuster(ct::getClangSyntaxOnlyAdjuster()); -} - -bool generatePython() { - bool bToRoc = TranslateToRoc; - TranslateToRoc = true; - bool bToPython = python::generate(GeneratePython); - TranslateToRoc = bToRoc; - return bToPython; -} - -int main(int argc, const char **argv) { - std::vector new_argv(argv, argv + argc); - if (std::find(new_argv.begin(), new_argv.end(), std::string("--")) == new_argv.end()) { - new_argv.push_back("--"); - new_argv.push_back(nullptr); - argv = new_argv.data(); - argc++; - } - llcompat::PrintStackTraceOnErrorSignal(); - ct::CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::ZeroOrMore); - if (!llcompat::CheckCompatibility()) { - return 1; - } - std::vector fileSources = OptionsParser.getSourcePathList(); - if (fileSources.empty() && !GeneratePerl && !GeneratePython) { - llvm::errs() << "\n" << sHipify << sError << "Must specify at least 1 positional argument for source file" << "\n"; - return 1; - } - if (!perl::generate(GeneratePerl)) { - llvm::errs() << "\n" << sHipify << sError << "hipify-perl generating failed" << "\n"; - return 1; - } - if (!generatePython()) { - llvm::errs() << "\n" << sHipify << sError << "hipify-python generating failed" << "\n"; - return 1; - } - if (fileSources.empty()) { - return 0; - } - std::string dst = OutputFilename, dstDir = OutputDir; - std::error_code EC; - std::string sOutputDirAbsPath = getAbsoluteDirectoryPath(OutputDir, EC, "output"); - if (EC) { - return 1; - } - if (!dst.empty()) { - if (fileSources.size() > 1) { - llvm::errs() << sHipify << sConflict << "-o and multiple source files are specified\n"; - return 1; - } - if (Inplace) { - llvm::errs() << sHipify << sConflict << "both -o and -inplace options are specified\n"; - return 1; - } - if (NoOutput) { - llvm::errs() << sHipify << sConflict << "both -no-output and -o options are specified\n"; - return 1; - } - if (!dstDir.empty()) { - dst = sOutputDirAbsPath + "/" + dst; - } - } - if (NoOutput && Inplace) { - llvm::errs() << sHipify << sConflict << "both -no-output and -inplace options are specified\n"; - return 1; - } - if (!dstDir.empty() && Inplace) { - llvm::errs() << sHipify << sConflict << "both -o-dir and -inplace options are specified\n"; - return 1; - } - if (Examine) { - NoOutput = PrintStats = true; - } - int Result = 0; - SmallString<128> tmpFile; - StringRef sourceFileName, ext = "hip", csv_ext = "csv"; - std::string sTmpFileName, sSourceAbsPath; - std::string sTmpDirAbsParh = getAbsoluteDirectoryPath(TemporaryDir, EC); - if (EC) { - return 1; - } - // Arguments for the Statistics print routines. - std::unique_ptr csv = nullptr; - llvm::raw_ostream *statPrint = nullptr; - bool create_csv = false; - if (!OutputStatsFilename.empty()) { - PrintStatsCSV = true; - create_csv = true; - } else { - if (PrintStatsCSV && fileSources.size() > 1) { - OutputStatsFilename = "sum_stat.csv"; - create_csv = true; - } - } - if (create_csv) { - if (!OutputDir.empty()) { - OutputStatsFilename = sOutputDirAbsPath + "/" + OutputStatsFilename; - } - csv = std::unique_ptr(new std::ofstream(OutputStatsFilename, std::ios_base::trunc)); - } - if (PrintStats) { - statPrint = &llvm::errs(); - } - sortInputFiles(argc, argv, fileSources); - for (const auto &src : fileSources) { - // Create a copy of the file to work on. When we're done, we'll move this onto the - // output (which may mean overwriting the input, if we're in-place). - // Should we fail for some reason, we'll just leak this file and not corrupt the input. - sSourceAbsPath = getAbsoluteFilePath(src, EC); - if (EC) { - continue; - } - sourceFileName = sys::path::filename(sSourceAbsPath); - if (dst.empty()) { - if (Inplace) { - dst = src; - } else { - dst = src + "." + ext.str(); - if (!dstDir.empty()) { - dst = sOutputDirAbsPath + "/" + sourceFileName.str() + "." + ext.str(); - } - } - } - if (TemporaryDir.empty()) { - EC = sys::fs::createTemporaryFile(sourceFileName, ext, tmpFile); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << tmpFile << "\n"; - Result = 1; - continue; - } - } else { - sTmpFileName = sTmpDirAbsParh + "/" + sourceFileName.str() + "." + ext.str(); - tmpFile = sTmpFileName; - } - EC = sys::fs::copy_file(src, tmpFile); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << src << " to " << tmpFile << "\n"; - Result = 1; - continue; - } - if (PrintStatsCSV) { - if (OutputStatsFilename.empty()) { - OutputStatsFilename = sourceFileName.str() + "." + csv_ext.str(); - if (!OutputDir.empty()) { - OutputStatsFilename = sOutputDirAbsPath + "/" + OutputStatsFilename; - } - } - if (!csv) { - csv = std::unique_ptr(new std::ofstream(OutputStatsFilename, std::ios_base::trunc)); - } - } - // Initialise the statistics counters for this file. - Statistics::setActive(src); - // RefactoringTool operates on the file in-place. Giving it the output path is no good, - // because that'll break relative includes, and we don't want to overwrite the input file. - // So what we do is operate on a copy, which we then move to the output. - ct::RefactoringTool Tool(OptionsParser.getCompilations(), std::string(tmpFile.c_str())); - ct::Replacements &replacementsToUse = llcompat::getReplacements(Tool, tmpFile.c_str()); - ReplacementsFrontendActionFactory actionFactory(&replacementsToUse); - appendArgumentsAdjusters(Tool, sSourceAbsPath, argv[0]); - Statistics ¤tStat = Statistics::current(); - // Hipify _all_ the things! - if (Tool.runAndSave(&actionFactory)) { - currentStat.hasErrors = true; - Result = 1; - LLVM_DEBUG(llvm::dbgs() << "Skipped some replacements.\n"); - } - // Copy the tmpfile to the output - if (!NoOutput && !currentStat.hasErrors) { - EC = sys::fs::copy_file(tmpFile, dst); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << tmpFile << " to " << dst << "\n"; - Result = 1; - continue; - } - } - // Remove the tmp file without error check - if (!SaveTemps) { - sys::fs::remove(tmpFile); - } - Statistics::current().markCompletion(); - Statistics::current().print(csv.get(), statPrint); - dst.clear(); - } - if (fileSources.size() > 1) { - Statistics::printAggregate(csv.get(), statPrint); - } - return Result; -} diff --git a/tests/hipify-clang/lit.cfg b/tests/hipify-clang/lit.cfg deleted file mode 100644 index ea496e6a69..0000000000 --- a/tests/hipify-clang/lit.cfg +++ /dev/null @@ -1,151 +0,0 @@ -# -*- Python -*- -import os -import platform -import re -import subprocess -import struct - -import lit.formats -import lit.util - -# Configuration file for the 'lit' test runner. -site_cfg = lit_config.params.get('site_config', None) -lit_config.load_config(config, site_cfg) - -config.excludes = ['cmdparser.hpp'] -config.excludes.append('spatial_batch_norm_op.h') -config.excludes.append('common_cudnn.h') - -delimiter = "==============================================================="; -print(delimiter) -print("CUDA " + config.cuda_version + " - will be used for testing") -print("LLVM " + config.llvm_version + " - will be used for testing") -print(platform.machine() + " - Platform architecture") -print(platform.system() + " " + platform.release() + " - Platform OS") -print(str(config.pointer_size * 8) + " - hipify-clang binary bitness") -print(str(struct.calcsize("P") * 8) + " - python " + str(platform.python_version()) + " binary bitness") -print(delimiter) -warns = None -if not config.cuda_dnn_root: - config.excludes.append('cudnn_convolution_forward.cu') - config.excludes.append('cudnn_softmax.cu') - print("WARN: cuDNN tests are excluded due to unset CUDA_DNN_ROOT_DIR") - warns = True -if not config.cuda_cub_root: - config.excludes.append('cub_01.cu') - config.excludes.append('cub_02.cu') - config.excludes.append('cub_03.cu') - print("WARN: CUB tests are excluded due to unset CUDA_CUB_ROOT_DIR") - warns = True -if warns: - print(delimiter) - -if config.cuda_version_major == 7 and config.cuda_version_minor == 0: - config.excludes.append('headers_test_09.cu') - config.excludes.append('cudnn_convolution_forward.cu') -if config.cuda_version_major < 8: - config.excludes.append('cuSPARSE_02.cu') -if config.cuda_version_major < 9: - config.excludes.append('cuSPARSE_04.cu') - config.excludes.append('cuSPARSE_05.cu') - config.excludes.append('cuSPARSE_06.cu') - config.excludes.append('cuSPARSE_07.cu') - config.excludes.append('benchmark_curand_kernel.cpp') -if config.cuda_version_major < 10: - config.excludes.append('cuSPARSE_08.cu') - config.excludes.append('cuSPARSE_09.cu') - config.excludes.append('cuSPARSE_10.cu') - config.excludes.append('cuSPARSE_11.cu') - -if config.llvm_version_major < 10: - config.excludes.append('pp_if_else_conditionals_LLVM_10.cu') - config.excludes.append('pp_if_else_conditionals_01_LLVM_10.cu') - -# name: The name of this test suite. -config.name = 'hipify' - -# suffixes: CUDA source is only supported -config.suffixes = ['.cu','.cuh','.cpp','.c','.hpp','.h'] - -# testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.ShTest() - -# test_source_root: The root path where tests are located. -config.test_source_root = os.path.dirname(__file__) - -# test_exec_root: The path where tests are located (default is the test suite root). -#config.test_exec_root = config.test_source_root - -# target_triple: Used by ShTest and TclTest formats for XFAIL checks. -config.target_triple = '(unused)' - -# available_features: Used by ShTest and TclTest formats for REQUIRES checks. -config.available_features = [] - -obj_root = getattr(config, 'obj_root', None) -if obj_root is not None: - config.test_exec_root = obj_root - -if obj_root is not None: - llvm_tools_dir = getattr(config, 'llvm_tools_dir', None) - if not llvm_tools_dir: - lit_config.fatal('No LLVM tools dir set!') - path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH'])) - config.environment['PATH'] = path - -hipify_path = obj_root - -clang_arguments = "-v" -if sys.platform in ['win32']: - run_test_ext = ".bat" - hipify_path += "/" + config.build_type - # CUDA SDK ROOT - clang_arguments += " -isystem'%s'/common/inc" -else: - run_test_ext = ".sh" - # CUDA SDK ROOT - clang_arguments += " -isystem'%s'/samples/common/inc" -if config.pointer_size == 8: - clang_arguments += " -D__LP64__" - -# Set max clang's CudaArch for corresponding CUDA version -# to support maximum CUDA features in offline tests -if config.cuda_version_major == 7: - if config.cuda_version_minor == 5: - clang_arguments += " --cuda-gpu-arch=sm_53" - else: - clang_arguments += " --cuda-gpu-arch=sm_52" -elif config.cuda_version_major == 8: - clang_arguments += " --cuda-gpu-arch=sm_62" -elif config.cuda_version_major == 9: - if config.cuda_version_minor == 2: - clang_arguments += " --cuda-gpu-arch=sm_72" - else: - clang_arguments += " --cuda-gpu-arch=sm_70" -elif config.cuda_version_major == 10: - clang_arguments += " --cuda-gpu-arch=sm_75" - -# cuDNN ROOT -if config.cuda_dnn_root: - clang_arguments += " -I'%s'/include" -# CUB ROOT -if config.cuda_cub_root: - clang_arguments += " -I'%s'" - -if config.cuda_dnn_root and config.cuda_cub_root: - config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root, config.cuda_cub_root))) -elif config.cuda_dnn_root: - config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root))) -elif config.cuda_cub_root: - config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_cub_root))) -else: - config.substitutions.append(("%clang_args", clang_arguments % config.cuda_sdk_root)) - -if config.llvm_version_major < 4: - hipify_arguments = "-I'%s'/include" -else: - hipify_arguments = "--cuda-path='%s'" - -config.substitutions.append(("%hipify_args", hipify_arguments % config.cuda_root)) -config.substitutions.append(("hipify", '"' + hipify_path + "/hipify-clang" + '"')) -config.substitutions.append(("%run_test", '"' + config.test_source_root + "/run_test" + run_test_ext + '"')) diff --git a/tests/hipify-clang/lit.site.cfg.in b/tests/hipify-clang/lit.site.cfg.in deleted file mode 100644 index 3c17567903..0000000000 --- a/tests/hipify-clang/lit.site.cfg.in +++ /dev/null @@ -1,38 +0,0 @@ -import sys -import os - -config.pointer_size = @CMAKE_SIZEOF_VOID_P@ -config.llvm_version = "@LLVM_PACKAGE_VERSION@" -config.llvm_version_major = int("@LLVM_VERSION_MAJOR@") -config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@" -config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@" -config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@" -config.cuda_dnn_root = "@CUDA_DNN_ROOT_DIR@" -config.cuda_cub_root = "@CUDA_CUB_ROOT_DIR@" -config.cuda_version_major = int("@CUDA_VERSION_MAJOR@") -config.cuda_version_minor = int("@CUDA_VERSION_MINOR@") -config.cuda_version = "@CUDA_VERSION@" -if sys.platform in ['win32']: - config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@" - if not config.cuda_sdk_root or config.cuda_sdk_root == "CUDA_SDK_ROOT_DIR-NOTFOUND": - cuda_version = config.cuda_version - cuda_version = cuda_version.replace('.','_') - config.cuda_samples_root = os.environ.get('NVCUDASAMPLES' + cuda_version + '_ROOT') - if not config.cuda_samples_root: - lit_config.fatal('No CUDA Samples dir set! Please set CUDA_SDK_ROOT_DIR.') - config.cuda_sdk_root = config.cuda_samples_root - config.build_type = "@CMAKE_BUILD_TYPE@" - if not config.build_type: - config.build_type = "Debug" -else: - config.cuda_sdk_root = config.cuda_root - -# Support substitution of the tools and libs dirs with user parameters. This is -# used when we can't determine the tool dir at configuration time. -try: - config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params - config.obj_root = config.obj_root % lit_config.params -except KeyError: - e = sys.exc_info()[1] - key, = e.args - lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) diff --git a/tests/hipify-clang/run_test.bat b/tests/hipify-clang/run_test.bat deleted file mode 100644 index 8b10bc8c2e..0000000000 --- a/tests/hipify-clang/run_test.bat +++ /dev/null @@ -1,21 +0,0 @@ -@echo off -setlocal - -for %%i in (FileCheck.exe) do set FILE_CHECK=%%~$PATH:i -if not defined FILE_CHECK (echo Error: FileCheck.exe not found in PATH. && exit /b 1) - -set HIPIFY=%1 -set IN_FILE=%2 -set TMP_FILE=%3 -set CUDA_ROOT=%4 -set ROC=%5 - -set all_args=%* -call set clang_args=%%all_args:*%6=%% -set clang_args=%6%clang_args% - -%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% %ROC% -- %clang_args% -if errorlevel 1 (echo Error: hipify-clang.exe failed with exit code: %errorlevel% && exit /b %errorlevel%) - -findstr /v /r /c:"[ ]*//[ ]*[CHECK*|RUN]" %TMP_FILE% | %FILE_CHECK% %IN_FILE% -if errorlevel 1 (echo Error: FileCheck.exe failed with exit code: %errorlevel% && exit /b %errorlevel%) diff --git a/tests/hipify-clang/run_test.sh b/tests/hipify-clang/run_test.sh deleted file mode 100755 index 357976558e..0000000000 --- a/tests/hipify-clang/run_test.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -set -o errexit - -# Run a single LIT test file in a magical way that preserves colour output, to work around -# a known flaw in lit. - -# Capture lit substitutions -HIPIFY=$1 -IN_FILE=$2 -TMP_FILE=$3 -CUDA_ROOT=$4 -ROC=$5 -shift 5 - -# Remaining args are the ones to forward to clang proper. - -$HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT $ROC -- $@ && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE diff --git a/tests/hipify-clang/unit_tests/casts/reinterpret_cast.cu b/tests/hipify-clang/unit_tests/casts/reinterpret_cast.cu deleted file mode 100644 index 6b6f4f5dde..0000000000 --- a/tests/hipify-clang/unit_tests/casts/reinterpret_cast.cu +++ /dev/null @@ -1,52 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include -#include - -__global__ -void fn(float* px, float* py) { - bool a[42]; - __shared__ double b[69]; - for (auto&& x : b) x = *py++; - for (auto&& x : a) x = *px++ > 0.0; - for (auto&& x : a) if (x) *--py = *--px; -} - -int main() { - // CHECK: hipFuncCache_t cacheConfig; - cudaFuncCache cacheConfig; - void* func; - // CHECK: hipFuncSetCacheConfig(reinterpret_cast(func), cacheConfig); - cudaFuncSetCacheConfig(func, cacheConfig); - // CHECK: hipFuncAttributes attr{}; - cudaFuncAttributes attr{}; - // CHECK: auto r = hipFuncGetAttributes(&attr, reinterpret_cast(&fn)); - auto r = cudaFuncGetAttributes(&attr, &fn); - // CHECK: if (r != hipSuccess || attr.maxThreadsPerBlock == 0) { - if (r != cudaSuccess || attr.maxThreadsPerBlock == 0) { - return 1; - } - return 0; -} diff --git a/tests/hipify-clang/unit_tests/device/atomics.cu b/tests/hipify-clang/unit_tests/device/atomics.cu deleted file mode 100644 index 3089efe1b8..0000000000 --- a/tests/hipify-clang/unit_tests/device/atomics.cu +++ /dev/null @@ -1,286 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args "-Xclang" "-fcuda-allow-variadic-functions" - -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include "hip/hip_runtime.h" -#include "cuda_runtime.h" - -#include -#include -#include -#include -#include -#include - -#define EXIT_WAIVED 2 - -const char* sampleName = "hipSimpleAtomicsTest"; - -using namespace std; -// Auto-Verification Code -bool testResult = true; - -bool computeGoldBitwise(...) { - return true; -} - -template{}>::type* = nullptr> -bool computeGoldBitwise(T* gpuData, int len) { - T val = 0xff; - for (int i = 0; i < len; ++i) { - // 9th element should be 1 - val &= (2 * i + 7); - } - if (val != gpuData[8]) { - printf("atomicAnd failed\n"); - return false; - } - - val = 0; - for (int i = 0; i < len; ++i) { - // 10th element should be 0xff - val |= (1 << i); - } - if (val != gpuData[9]) { - printf("atomicOr failed\n"); - return false; - } - - val = 0xff; - for (int i = 0; i < len; ++i) { - // 11th element should be 0xff - val ^= i; - } - if (val != gpuData[10]) { - printf("atomicXor failed\n"); - return false; - } - - return true; -} - -template -bool computeGold(T* gpuData, int len) { - T val = 0; - for (int i = 0; i < len; ++i) { - val += 10; - } - if (val != gpuData[0]) { - printf("atomicAdd failed\n"); - return false; - } - - val = 0; - for (int i = 0; i < len; ++i) { - val -= 10; - } - if (val != gpuData[1]) { - printf("atomicSub failed\n"); - return false; - } - - bool found = false; - for (T i = 0; i < len; ++i) { - // third element should be a member of [0, len) - if (i == gpuData[2]) { - found = true; - break; - } - } - if (!found) { - printf("atomicExch failed\n"); - return false; - } - - val = -(1 << 8); - for (T i = 0; i < len; ++i) { - // fourth element should be len-1 - val = max(val, i); - } - if (val != gpuData[3]) { - printf("atomicMax failed\n"); - return false; - } - - val = 1 << 8; - for (T i = 0; i < len; ++i) { - val = min(val, i); - } - if (val != gpuData[4]) { - printf("atomicMin failed\n"); - return false; - } - - int limit = 17; - val = 0; - for (int i = 0; i < len; ++i) { - val = (val >= limit) ? 0 : val + 1; - } - if (val != gpuData[5]) { - printf("atomicInc failed\n"); - return false; - } - - limit = 137; - val = 0; - for (int i = 0; i < len; ++i) { - val = ((val == 0) || (val > limit)) ? limit : val - 1; - } - if (val != gpuData[6]) { - printf("atomicDec failed\n"); - return false; - } - - found = false; - for (T i = 0; i < len; ++i) { - // eighth element should be a member of [0, len) - if (i == gpuData[7]) { - found = true; - break; - } - } - if (!found) { - printf("atomicCAS failed\n"); - return false; - } - - return computeGoldBitwise(gpuData, len); -} - -__device__ -void testKernelExch(...) {} - -template{}>::type* = nullptr> -__device__ -void testKernelExch(T* g_odata) { - // access thread id - const T tid = blockDim.x * blockIdx.x + threadIdx.x; - // Atomic exchange - atomicExch(&g_odata[2], tid); -} - -__device__ -void testKernelSub(...) {} - -template< - typename T, - typename enable_if< - is_same{} || is_same{}>::type* = nullptr> -__device__ -void testKernelSub(T* g_odata) { - // Atomic subtraction (final should be 0) - atomicSub(&g_odata[1], 10); -} - -__device__ -void testKernelIntegral(...) {} - -template{}>::type* = nullptr> -__device__ -void testKernelIntegral(T* g_odata) { - // access thread id - const T tid = blockDim.x * blockIdx.x + threadIdx.x; - // Atomic maximum - atomicMax(&g_odata[3], tid); - // Atomic minimum - atomicMin(&g_odata[4], tid); - // Atomic increment (modulo 17+1) - atomicInc((unsigned int*)&g_odata[5], 17); - // Atomic decrement - atomicDec((unsigned int*)&g_odata[6], 137); - // Atomic compare-and-swap - atomicCAS(&g_odata[7], tid - 1, tid); - // Atomic AND - atomicAnd(&g_odata[8], 2 * tid + 7); - // Atomic OR - atomicOr(&g_odata[9], 1 << tid); - // Atomic XOR - atomicXor(&g_odata[10], tid); - testKernelSub(g_odata); -} - -template -__global__ void testKernel(T* g_odata) { - // Atomic addition - atomicAdd(&g_odata[0], 10); - testKernelIntegral(g_odata); - testKernelExch(g_odata); -} - -template -void runTest() { - // CHECK: hipDeviceProp_t deviceProp; - cudaDeviceProp deviceProp; - deviceProp.major = 0; - deviceProp.minor = 0; - int dev = 0; - // CHECK: hipGetDeviceProperties(&deviceProp, dev); - cudaGetDeviceProperties(&deviceProp, dev); - // Statistics about the GPU device - printf( - "> GPU device has %d Multi-Processors, " - "SM %d.%d compute capabilities\n\n", - deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor); - unsigned int numThreads = 256; - unsigned int numBlocks = 64; - unsigned int numData = 11; - unsigned int memSize = sizeof(T) * numData; - - // Allocate mem for the result on host side - T* hOData = (T*)malloc(memSize); - // Initialize the memory - for (unsigned int i = 0; i < numData; i++) hOData[i] = 0; - // To make the AND and XOR tests generate something other than 0... - hOData[8] = hOData[10] = 0xff; - // Allocate device memory for result - T* dOData; - // CHECK: hipMalloc((void**)&dOData, memSize); - cudaMalloc((void**)&dOData, memSize); - // Copy host memory to device to initialize to zero - // CHECK: hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice); - cudaMemcpy(dOData, hOData, memSize, cudaMemcpyHostToDevice); - // Execute the kernel - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(testKernel), dim3(numBlocks), dim3(numThreads), 0, 0, dOData); - testKernel<<>>(dOData); - // Copy result from device to host - // CHECK: hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost); - cudaMemcpy(hOData, dOData, memSize, cudaMemcpyDeviceToHost); - // Compute reference solution - testResult = computeGold(hOData, numThreads * numBlocks); - // Cleanup memory - free(hOData); - // CHECK: hipFree(dOData); - cudaFree(dOData); -} - -int main(int argc, char** argv) { - printf("%s starting...\n", sampleName); - runTest(); - runTest(); - runTest(); - runTest(); -#if CUDA_VERSION >= 8000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 - runTest(); -#endif - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - printf("%s completed, returned %s\n", sampleName, testResult ? "OK" : "ERROR!"); - exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE); -} diff --git a/tests/hipify-clang/unit_tests/device/device_symbols.cu b/tests/hipify-clang/unit_tests/device/device_symbols.cu deleted file mode 100644 index b58abeda46..0000000000 --- a/tests/hipify-clang/unit_tests/device/device_symbols.cu +++ /dev/null @@ -1,152 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include -#include -#include -#include - -#define NUM 1024 -#define SIZE 1024 * 4 - -__device__ int globalIn[NUM]; -__device__ int globalOut[NUM]; - -__global__ void Assign(int* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Out[tid] = globalIn[tid]; - globalOut[tid] = globalIn[tid]; -} - -__device__ __constant__ int globalConst[NUM]; - -__global__ void checkAddress(int* addr, bool* out) { - *out = (globalConst == addr); -} - -int main() { - int *A, *Am, *B, *Ad, *C, *Cm; - A = new int[NUM]; - B = new int[NUM]; - C = new int[NUM]; - for (int i = 0; i < NUM; ++i) { - A[i] = -1 * i; - B[i] = 0; - C[i] = 0; - } - // CHECK: hipMalloc((void**)&Ad, SIZE); - cudaMalloc((void**)&Ad, SIZE); - // CHECK: hipHostMalloc((void**)&Am, SIZE); - cudaMallocHost((void**)&Am, SIZE); - // CHECK: hipHostMalloc((void**)&Cm, SIZE); - cudaMallocHost((void**)&Cm, SIZE); - for (int i = 0; i < NUM; ++i) { - Am[i] = -1 * i; - Cm[i] = 0; - } - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipStreamCreate(&stream); - cudaStreamCreate(&stream); - // CHECK: hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream); - cudaMemcpyToSymbolAsync(globalIn, Am, SIZE, 0, cudaMemcpyHostToDevice, stream); - // CHECK: hipStreamSynchronize(stream); - cudaStreamSynchronize(stream); - // CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); - Assign<<>>(Ad); - // CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); - cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost); - // CHECK: hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream); - cudaMemcpyFromSymbolAsync(Cm, globalOut, SIZE, 0, cudaMemcpyDeviceToHost, stream); - // CHECK: hipStreamSynchronize(stream); - cudaStreamSynchronize(stream); - for (int i = 0; i < NUM; ++i) { - assert(Am[i] == B[i]); - assert(Am[i] == Cm[i]); - } - for (int i = 0; i < NUM; ++i) { - A[i] = -2 * i; - B[i] = 0; - } - // CHECK: hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice); - cudaMemcpyToSymbol(globalIn, A, SIZE, 0, cudaMemcpyHostToDevice); - // CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); - Assign<<>>(Ad); - // CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); - cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost); - // CHECK: hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost); - cudaMemcpyFromSymbol(C, globalOut, SIZE, 0, cudaMemcpyDeviceToHost); - for (int i = 0; i < NUM; ++i) { - assert(A[i] == B[i]); - assert(A[i] == C[i]); - } - for (int i = 0; i < NUM; ++i) { - A[i] = -3 * i; - B[i] = 0; - } - // CHECK: hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream); - cudaMemcpyToSymbolAsync(globalIn, A, SIZE, 0, cudaMemcpyHostToDevice, stream); - // CHECK: hipStreamSynchronize(stream); - cudaStreamSynchronize(stream); - // CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); - Assign<<>>(Ad); - // CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); - cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost); - // CHECK: hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream); - cudaMemcpyFromSymbolAsync(C, globalOut, SIZE, 0, cudaMemcpyDeviceToHost, stream); - // CHECK: hipStreamSynchronize(stream); - cudaStreamSynchronize(stream); - for (int i = 0; i < NUM; ++i) { - assert(A[i] == B[i]); - assert(A[i] == C[i]); - } - bool *checkOkD; - bool checkOk = false; - size_t symbolSize = 0; - int *symbolAddress; - // CHECK: hipGetSymbolSize(&symbolSize, HIP_SYMBOL(globalConst)); - cudaGetSymbolSize(&symbolSize, globalConst); - // CHECK: hipGetSymbolAddress((void**) &symbolAddress, HIP_SYMBOL(globalConst)); - cudaGetSymbolAddress((void**) &symbolAddress, globalConst); - // CHECK: hipMalloc((void**)&checkOkD, sizeof(bool)); - cudaMalloc((void**)&checkOkD, sizeof(bool)); - // CHECK: hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD); - checkAddress<<>>(symbolAddress, checkOkD); - // CHECK: hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost); - cudaMemcpy(&checkOk, checkOkD, sizeof(bool), cudaMemcpyDeviceToHost); - // CHECK: hipFree(checkOkD); - cudaFree(checkOkD); - assert(checkOk); - assert(symbolSize == SIZE); - // CHECK: hipHostFree(Am); - cudaFreeHost(Am); - // CHECK: hipHostFree(Cm); - cudaFreeHost(Cm); - // CHECK: hipFree(Ad); - cudaFree(Ad); - delete[] A; - delete[] B; - delete[] C; - return 0; -} diff --git a/tests/hipify-clang/unit_tests/device/math_functions.cu b/tests/hipify-clang/unit_tests/device/math_functions.cu deleted file mode 100644 index c833ca0182..0000000000 --- a/tests/hipify-clang/unit_tests/device/math_functions.cu +++ /dev/null @@ -1,58 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// Synthetic test to warn only on device functions umin and umax as unsupported, but not on user defined ones. -// ToDo: change lit testing in order to parse the output. - -#define LEN 1024 -#define SIZE LEN * sizeof(float) -// CHECK: #include -#include - -namespace my { - // user defined function - unsigned int umin(unsigned int arg1, unsigned int arg2) { - return (arg1 < arg2) ? arg1 : arg2; - } - // user defined function - unsigned int umax(unsigned int arg1, unsigned int arg2) { - return (arg1 > arg2) ? arg1 : arg2; - } -} - -__global__ void uint_arithm(float* A, float* B, float* C, unsigned int u1, unsigned int u2) -{ - // device function call (warn if unsupported) - unsigned int _umin = umin ( u1, u2 ); - // device function call (warn if unsupported) - unsigned int _umax = umax ( u1, u2 ); - // device function call (warn if unsupported) - unsigned int _umin_global = ::umin ( u1, u2 ); - // device function call (warn if unsupported) - unsigned int _umax_global = ::umax(u1, u2); - if (_umin != _umin_global) return; - if (_umax != _umax_global) return; - int i = threadIdx.x; - A[i] = i + _umin; - B[i] = i + _umax; - C[i] = A[i] + B[i]; -} - -int main() { - unsigned int u1 = 33; - unsigned int u2 = 34; - // user defined function call - unsigned int _min = my::umin(u1, u2); - // user defined function call - unsigned int _max = my::umax(u1, u2); - float *A, *B, *C; - // CHECK: hipMalloc((void**)&A, SIZE); - cudaMalloc((void**)&A, SIZE); - // CHECK: hipMalloc((void**)&B, SIZE); - cudaMalloc((void**)&B, SIZE); - // CHECK: hipMalloc((void**)&C, SIZE); - cudaMalloc((void**)&C, SIZE); - dim3 dimGrid(LEN / 512, 1, 1); - dim3 dimBlock(512, 1, 1); - // CHECK: hipLaunchKernelGGL(uint_arithm, dim3(dimGrid), dim3(dimBlock), 0, 0, A, B, C, u1, u2); - uint_arithm<<>>(A, B, C, u1, u2); - return _min < _max; -} diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_01.cu b/tests/hipify-clang/unit_tests/headers/headers_test_01.cu deleted file mode 100644 index 013d7c17c4..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_01.cu +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK-NOT: #include -// CHECK: #include -#include -#include -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_02.cu b/tests/hipify-clang/unit_tests/headers/headers_test_02.cu deleted file mode 100644 index 957fd16559..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_02.cu +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include "hip/hip_runtime.h" -// CHECK-NOT: #include "cuda_runtime.h" -// CHECK: #include -#include "cuda.h" -#include "cuda_runtime.h" -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_03.cu b/tests/hipify-clang/unit_tests/headers/headers_test_03.cu deleted file mode 100644 index 14735172fb..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_03.cu +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #pragma once -// CHECK-NEXT: #include -#pragma once -// CHECK-NOT: #include -int main(int argc, char* argv[]) { - return 0; -} - diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_04.cu b/tests/hipify-clang/unit_tests/headers/headers_test_04.cu deleted file mode 100644 index 10a7daf41c..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_04.cu +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK-NEXT: #include -// CHECK-NEXT: #include -#include -#include -// CHECK-NOT: #include -int main(int argc, char* argv[]) { - return 0; -} - diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_05.cu b/tests/hipify-clang/unit_tests/headers/headers_test_05.cu deleted file mode 100644 index 4706044b9d..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_05.cu +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #pragma once -// CHECK-NEXT: #include -#pragma once -// CHECK-NOT: #include -#include - -int main(int argc, char* argv[]) { - return 0; -} - diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_06.cu b/tests/hipify-clang/unit_tests/headers/headers_test_06.cu deleted file mode 100644 index 1adccd95e4..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_06.cu +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK-NOT: #include -// CHECK: #include -#include -#include -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_07.cu b/tests/hipify-clang/unit_tests/headers/headers_test_07.cu deleted file mode 100644 index 1effc189b8..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_07.cu +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include "hipblas.h" -// CHECK-NOT: #include "cublas.h" -// CHECK: #include -#include "cublas_v2.h" -#include "cublas.h" -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_08.cu b/tests/hipify-clang/unit_tests/headers/headers_test_08.cu deleted file mode 100644 index aca7f194b0..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_08.cu +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK-NOT: #include -// CHECK: #include -// CHECK: #include "hipblas.h" -// CHECK-NOT: #include "cublas.h" -// CHECK: #include -#include -#include -#include -#include "cublas_v2.h" -#include "cublas.h" -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_09.cu b/tests/hipify-clang/unit_tests/headers/headers_test_09.cu deleted file mode 100644 index 37e718b5a4..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_09.cu +++ /dev/null @@ -1,100 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK: #include - -// CHECK-NOT: #include -// CHECK-NOT: #include - -// CHECK: #include "hip/hip_runtime_api.h" -// CHECK: #include "hip/channel_descriptor.h" -// CHECK: #include "hip/device_functions.h" -// CHECK: #include "hip/driver_types.h" -// CHECK: #include "hip/hip_complex.h" -// CHECK: #include "hip/hip_fp16.h" -// CHECK: #include "hip/hip_texture_types.h" -// CHECK: #include "hip/hip_vector_types.h" - -// CHECK: #include - -// CHECK: #include "hipblas.h" -// CHECK-NOT: #include "cublas.h" - -// CHECK: #include - -// CHECK: #include "hiprand.h" -// CHECK: #include "hiprand_kernel.h" - -// CHECK: #include - -// CHECK-NOT: #include "hiprand.h" -// CHECK-NOT: #include "hiprand_kernel.h" -// CHECK-NOT: #include "curand_discrete.h" -// CHECK-NOT: #include "curand_discrete2.h" -// CHECK-NOT: #include "curand_globals.h" -// CHECK-NOT: #include "curand_lognormal.h" -// CHECK-NOT: #include "curand_mrg32k3a.h" -// CHECK-NOT: #include "curand_mtgp32.h" -// CHECK-NOT: #include "curand_mtgp32_host.h" -// CHECK-NOT: #include "curand_mtgp32_kernel.h" -// CHECK-NOT: #include "curand_mtgp32dc_p_11213.h" -// CHECK-NOT: #include "curand_normal.h" -// CHECK-NOT: #include "curand_normal_static.h" -// CHECK-NOT: #include "curand_philox4x32_x.h" -// CHECK-NOT: #include "curand_poisson.h" -// CHECK-NOT: #include "curand_precalc.h" -// CHECK-NOT: #include "curand_uniform.h" - -// CHECK: #include - -// CHECK: #include "hipfft.h" -// CHECK: #include "hipsparse.h" - -#include - -#include - -#include - -#include "cuda_runtime_api.h" -#include "channel_descriptor.h" -#include "device_functions.h" -#include "driver_types.h" -#include "cuComplex.h" -#include "cuda_fp16.h" -#include "cuda_texture_types.h" -#include "vector_types.h" - -#include - -#include "cublas_v2.h" -#include "cublas.h" - -#include - -#include "curand.h" -#include "curand_kernel.h" - -#include - -#include "curand_discrete.h" -#include "curand_discrete2.h" -#include "curand_globals.h" -#include "curand_lognormal.h" -#include "curand_mrg32k3a.h" -#include "curand_mtgp32.h" -#include "curand_mtgp32_host.h" -#include "curand_mtgp32_kernel.h" -#include "curand_mtgp32dc_p_11213.h" -#include "curand_normal.h" -#include "curand_normal_static.h" -#include "curand_philox4x32_x.h" -#include "curand_poisson.h" -#include "curand_precalc.h" -#include "curand_uniform.h" - -#include - -#include "cufft.h" - -#include "cusparse.h" diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_10.cu b/tests/hipify-clang/unit_tests/headers/headers_test_10.cu deleted file mode 100644 index 1c2db50e3b..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_10.cu +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Checks that HIP header file is included after #pragma once, -// which goes before include guard controlling macro. -// CHECK: #pragma once -// CHECK-NEXT: #include -#pragma once -#ifndef HEADERS_TEST_10_H -// CHECK: #ifndef HEADERS_TEST_10_H -// CHECK-NOT: #include -#define HEADERS_TEST_10_H -#include -static int counter = 0; -#endif // HEADERS_TEST_10_H diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_11.cu b/tests/hipify-clang/unit_tests/headers/headers_test_11.cu deleted file mode 100644 index 7c59dbe4a7..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_11.cu +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Checks that HIP header file is included after include guard controlling macro, -// which goes before #pragma once. -// CHECK: #ifndef HEADERS_TEST_10_H -// CHECK-NEXT: #include -#ifndef HEADERS_TEST_10_H -// CHECK: #pragma once -#pragma once -// CHECK-NOT: #include -#define HEADERS_TEST_10_H -#include -static int counter = 0; -#endif // HEADERS_TEST_10_H diff --git a/tests/hipify-clang/unit_tests/kernel_launch/kernel_launch_01.cu b/tests/hipify-clang/unit_tests/kernel_launch/kernel_launch_01.cu deleted file mode 100644 index 3795d3c799..0000000000 --- a/tests/hipify-clang/unit_tests/kernel_launch/kernel_launch_01.cu +++ /dev/null @@ -1,46 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// Synthetic test to warn only on device functions umin and umax as unsupported, but not on user defined ones. -// ToDo: change lit testing in order to parse the output. - -#define LEN 1024 -#define SIZE LEN * sizeof(float) -#define ITER 1024*1024 - -// CHECK: #include -#include - -#define CUDA_LAUNCH(cuda_call,dimGrid,dimBlock, ...) \ - cuda_call<<>>(__VA_ARGS__); - -__global__ void Inc1(float *Ad, float *Bd) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if (tx < 1) { - for (int i = 0; i < ITER; ++i) { - Ad[tx] = Ad[tx] + 1.0f; - for (int j = 0; j < 256; ++j) { - Bd[tx] = Ad[tx]; - } - } - } -} - -int main() { - float *A, *Ad, *Bd; - A = new float[LEN]; - for (int i = 0; i < LEN; ++i) { - A[i] = 0.0f; - } - // CHECK: hipError_t status; - cudaError_t status; - // CHECK: status = hipHostRegister(A, SIZE, hipHostRegisterMapped); - status = cudaHostRegister(A, SIZE, cudaHostRegisterMapped); - // CHECK: hipHostGetDevicePointer(&Ad, A, 0); - cudaHostGetDevicePointer(&Ad, A, 0); - // CHECK: hipMalloc((void**)&Bd, SIZE); - cudaMalloc((void**)&Bd, SIZE); - dim3 dimGrid(LEN / 512, 1, 1); - dim3 dimBlock(512, 1, 1); - - // CHECK: hipLaunchKernelGGL(Inc1, dim3(dimGrid), dim3(dimBlock), 0, 0, Ad, Bd); - CUDA_LAUNCH(Inc1, dimGrid, dimBlock, Ad, Bd); -} diff --git a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/core/common_cudnn.h b/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/core/common_cudnn.h deleted file mode 100644 index e9437c11f5..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/core/common_cudnn.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef CAFFE2_CORE_COMMON_CUDNN_H_ -#define CAFFE2_CORE_COMMON_CUDNN_H_ - -#include -#include - -#endif // CAFFE2_CORE_COMMON_CUDNN_H_ diff --git a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/operators/spatial_batch_norm_op.h b/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/operators/spatial_batch_norm_op.h deleted file mode 100644 index 7b8a13788a..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/operators/spatial_batch_norm_op.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_ -#define CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_ - -#include -#include -#include -#include -#include - -namespace caffe2 { - -} // namespace caffe2 - -#endif // CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_ diff --git a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_01.cu b/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_01.cu deleted file mode 100644 index 3c82045d30..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_01.cu +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args - -// NOTE: Nonworking code just for conversion testing - -// CHECK: #include -#include -#include -#include -// CHECK: #include "caffe2/operators/hip/spatial_batch_norm_op_miopen.hip" -#include "caffe2/operators/spatial_batch_norm_op.h" -// CHECK: #include "caffe2/core/hip/common_miopen.h" -#include "caffe2/core/common_cudnn.h" diff --git a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_02.cu b/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_02.cu deleted file mode 100644 index 7f29cfe25c..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_02.cu +++ /dev/null @@ -1,102 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args - -// NOTE: Nonworking code just for conversion testing - -// CHECK: #include -#include -#include -#include -#include - -namespace caffe2 { - -// Operator Definition. -struct OperatorDef { - int input = 1; - int output = 2; - int name = 3; -}; - -class OperatorBase; -class Workspace; - -template -class Observable { - public: - Observable() = default; - - Observable(Observable&&) = default; - Observable& operator =(Observable&&) = default; - - virtual ~Observable() = default; -}; - -template -class ObserverBase { - public: - explicit ObserverBase(T* subject) : subject_(subject) {} - - virtual void Start() {} - virtual void Stop() {} - - virtual std::string debugInfo() { - return "Not implemented."; - } - - virtual ~ObserverBase() noexcept {}; - - T* subject() const { - return subject_; - } - - protected: - T* subject_; -}; - -typedef ObserverBase OperatorObserver; - -class OperatorBase : public Observable { - public: - explicit OperatorBase(const OperatorDef& operator_def, Workspace* ws); - virtual ~OperatorBase() noexcept {} -}; - -template -class Operator : public OperatorBase { - public: - explicit Operator(const OperatorDef& operator_def, Workspace* ws) - : OperatorBase(operator_def, ws) { - } - ~Operator() noexcept override {} -}; - -template -class DummyEmptyOp : public Operator { - public: - DummyEmptyOp(const OperatorDef& def, Workspace* ws) - : Operator(def, ws) {} - - bool RunOnDevice() final { return true; } -}; - - -class CUDAContext { -public: - CUDAContext(); - virtual ~CUDAContext() noexcept {} -}; - -#define REGISTER_CUDA_OPERATOR(name, ...) \ - void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ - static void CAFFE_ANONYMOUS_VARIABLE_CUDA##name() { \ - CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ - } - -#define REGISTER_CUDA_OPERATOR_CREATOR(key, ...) - -// CHECK: REGISTER_HIP_OPERATOR(Operator, DummyEmptyOp); -REGISTER_CUDA_OPERATOR(Operator, DummyEmptyOp); -// CHECK: REGISTER_HIP_OPERATOR_CREATOR(Operator, DummyEmptyOp); -REGISTER_CUDA_OPERATOR_CREATOR(Operator, DummyEmptyOp); - -} diff --git a/tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu b/tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu deleted file mode 100644 index 4646015e74..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu +++ /dev/null @@ -1,60 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -#include - -// TODO: -// using namespace cub; - -template -__global__ void sort(const T* data_in, T* data_out){ - // CHECK: typedef ::hipcub::BlockRadixSort BlockRadixSortT; - typedef ::cub::BlockRadixSort BlockRadixSortT; - __shared__ typename BlockRadixSortT::TempStorage tmp_sort; - double items[4]; - int i0 = 4 * (blockIdx.x * blockDim.x + threadIdx.x); - for (int i = 0; i < 4; ++i){ - items[i] = data_in[i0 + i]; - } - BlockRadixSortT(tmp_sort).Sort(items); - for (int i = 0; i < 4; ++i){ - data_out[i0 + i] = items[i]; - } -} - -int main(){ - double* d_gpu = NULL; - double* result_gpu = NULL; - double* data_sorted = new double[4096]; - // Allocate memory on the GPU - // CHECK: hipMalloc(&d_gpu, 4096 * sizeof(double)); - cudaMalloc(&d_gpu, 4096 * sizeof(double)); - // CHECK: hipMalloc(&result_gpu, 4096 * sizeof(double)); - cudaMalloc(&result_gpu, 4096 * sizeof(double)); - // CHECK: hiprandGenerator_t gen; - curandGenerator_t gen; - // Create generator - // CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT); - curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); - // Fill array with random numbers - // CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0); - curandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0); - // Destroy generator - // CHECK: hiprandDestroyGenerator(gen); - curandDestroyGenerator(gen); - // Sort data - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort), dim3(1), dim3(1024), 0, 0, d_gpu, result_gpu); - sort<<<1, 1024>>>(d_gpu, result_gpu); - // CHECK: hipMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), hipMemcpyDeviceToHost); - cudaMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), cudaMemcpyDeviceToHost); - // Write the sorted data to standard out - for (int i = 0; i < 4096; ++i){ - std::cout << data_sorted[i] << ", "; - } - std::cout << std::endl; -} diff --git a/tests/hipify-clang/unit_tests/libraries/CUB/cub_02.cu b/tests/hipify-clang/unit_tests/libraries/CUB/cub_02.cu deleted file mode 100644 index 21898baa03..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CUB/cub_02.cu +++ /dev/null @@ -1,69 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -#include - -template -__global__ void sort(const T* data_in, T* data_out){ - // CHECK: typedef hipcub::BlockLoad BlockLoadT; - typedef cub::BlockLoad BlockLoadT; - // CHECK: typedef hipcub::BlockRadixSort BlockRadixSortT; - typedef cub::BlockRadixSort BlockRadixSortT; - // CHECK: typedef hipcub::BlockStore BlockStoreT; - typedef cub::BlockStore BlockStoreT; - __shared__ union { - typename BlockLoadT::TempStorage load; - typename BlockRadixSortT::TempStorage sort; - typename BlockStoreT::TempStorage store; - } tmp_storage; - T items[ITEMS_PER_THREAD]; - BlockLoadT(tmp_storage.load).Load(data_in + blockIdx.x * BLOCK_WIDTH * ITEMS_PER_THREAD, items); - __syncthreads(); - BlockRadixSortT(tmp_storage.sort).Sort(items); - __syncthreads(); - BlockStoreT(tmp_storage.store).Store(data_out + blockIdx.x * BLOCK_WIDTH * ITEMS_PER_THREAD, items); -} - -int main() { - double* d_gpu = NULL; - double* result_gpu = NULL; - double* data_sorted = new double[1000*4096]; - // Allocate memory on the GPU - // CHECK: hipMalloc(&d_gpu, 1000*4096 * sizeof(double)); - cudaMalloc(&d_gpu, 1000*4096 * sizeof(double)); - // CHECK: hipMalloc(&result_gpu, 1000*4096 * sizeof(double)); - cudaMalloc(&result_gpu, 1000*4096 * sizeof(double)); - // CHECK: hiprandGenerator_t gen; - curandGenerator_t gen; - // Create generator - // CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT); - curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); - // Fill array with random numbers - // CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 1000*4096, 0.0, 1.0); - curandGenerateNormalDouble(gen, d_gpu, 1000*4096, 0.0, 1.0); - // Destroy generator - // CHECK: hiprandDestroyGenerator(gen); - curandDestroyGenerator(gen); - // Sort data - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<512, 8, hipcub::BLOCK_LOAD_TRANSPOSE, hipcub::BLOCK_STORE_TRANSPOSE>), dim3(1000), dim3(512), 0, 0, d_gpu, result_gpu); - sort<512, 8, cub::BLOCK_LOAD_TRANSPOSE, cub::BLOCK_STORE_TRANSPOSE><<<1000, 512>>>(d_gpu, result_gpu); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<256, 16, hipcub::BLOCK_LOAD_DIRECT, hipcub::BLOCK_STORE_DIRECT>), dim3(1000), dim3(256), 0, 0, d_gpu, result_gpu); - sort<256, 16, cub::BLOCK_LOAD_DIRECT, cub::BLOCK_STORE_DIRECT><<<1000, 256>>>(d_gpu, result_gpu); - // CHECK: hipMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), hipMemcpyDeviceToHost); - cudaMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), cudaMemcpyDeviceToHost); - // Write the sorted data to standard out - for (int i = 0; i < 4095; ++i) { - std::cout << data_sorted[i] << ", "; - } - std::cout << data_sorted[4095] << std::endl; -} diff --git a/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu b/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu deleted file mode 100644 index bc914d419d..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu +++ /dev/null @@ -1,33 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include -// CHECK: #include -#include - -// using namespace hipcub; -using namespace cub; - -// Simple CUDA kernel for computing tiled partial sums -template - cub::BlockScanAlgorithm SCAN_ALGO> -__global__ void ScanTilesKernel(int *d_in, int *d_out) { - // Specialize collective types for problem context - // CHECK: typedef ::hipcub::BlockLoad BlockLoadT; - typedef ::cub::BlockLoad BlockLoadT; - typedef BlockScan BlockScanT; - // Allocate on-chip temporary storage - __shared__ union { - typename BlockLoadT::TempStorage load; - typename BlockScanT::TempStorage reduce; - } temp_storage; - // Load data per thread - int thread_data[ITEMS_PER_THREAD]; - int offset = blockIdx.x * (BLOCK_THREADS * ITEMS_PER_THREAD); - BlockLoadT(temp_storage.load).Load(d_in + offset, offset); - __syncthreads(); - // Compute the block-wide prefix sum - BlockScanT(temp_storage).Sum(thread_data); -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu deleted file mode 100644 index 69812c98b0..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu +++ /dev/null @@ -1,81 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -#include -#include -#include -// CHECK: #include "hipblas.h" -#include "cublas.h" -#define M 6 -#define N 5 -#define IDX2C(i,j,ld) (((j)*(ld))+(i)) -static __inline__ void modify(float *m, int ldm, int n, int p, int q, float - alpha, float beta) { - // CHECK: hipblasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - // CHECK: hipblasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); - cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); -} -int main(void) { - int i, j; - // CHECK: hipblasStatus_t stat; - cublasStatus stat; - float* devPtrA; - float* a = 0; - a = (float *)malloc(M * N * sizeof(*a)); - if (!a) { - printf("host memory allocation failed"); - return EXIT_FAILURE; - } - for (j = 0; j < N; j++) { - for (i = 0; i < M; i++) { - a[IDX2C(i, j, M)] = (float)(i * M + j + 1); - } - } - // cublasInit is not supported yet - cublasInit(); - // cublasAlloc is not supported yet - stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("device memory allocation failed"); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data download failed"); - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); - // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data upload failed"); - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - for (j = 0; j < N; j++) { - for (i = 0; i < M; i++) { - printf("%7.0f", a[IDX2C(i, j, M)]); - } - printf("\n"); - } - free(a); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu deleted file mode 100644 index 6983140eac..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu +++ /dev/null @@ -1,90 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include "hipblas.h" -#include "cublas_v2.h" -#define M 6 -#define N 5 -#define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1)) -// CHECK: static __inline__ void modify(hipblasHandle_t handle, float *m, int ldm, int -static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int - n, int p, int q, float alpha, float beta) { - // CHECK: hipblasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); - // CHECK: hipblasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); - cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); - cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); -} -int main(void) { - // CHECK: hipError_t cudaStat; - // CHECK: hipblasStatus_t stat; - // CHECK: hipblasHandle_t handle; - cudaError_t cudaStat; - cublasStatus_t stat; - cublasHandle_t handle; - int i, j; - float* devPtrA; - float* a = 0; - a = (float *)malloc(M * N * sizeof(*a)); - if (!a) { - printf("host memory allocation failed"); - return EXIT_FAILURE; - } - for (j = 1; j <= N; j++) { - for (i = 1; i <= M; i++) { - a[IDX2F(i, j, M)] = (float)((i - 1) * M + j); - } - } - // CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a)); - cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a)); - // CHECK: if (cudaStat != hipSuccess) { - if (cudaStat != cudaSuccess) { - printf("device memory allocation failed"); - return EXIT_FAILURE; - } - // CHECK: stat = hipblasCreate(&handle); - stat = cublasCreate(&handle); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("CUBLAS initialization failed\n"); - return EXIT_FAILURE; - } - // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data download failed"); - // CHECK: hipFree(devPtrA); - // CHECK: hipblasDestroy(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - return EXIT_FAILURE; - } - modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f); - // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data upload failed"); - // CHECK: hipFree(devPtrA); - // CHECK: hipblasDestroy(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - return EXIT_FAILURE; - } - // CHECK: hipFree(devPtrA); - // CHECK: hipblasDestroy(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - for (j = 1; j <= N; j++) { - for (i = 1; i <= M; i++) { - printf("%7.0f", a[IDX2F(i, j, M)]); - } - printf("\n"); - } - free(a); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu deleted file mode 100644 index ecd8fb7eee..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu +++ /dev/null @@ -1,108 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#include -#include -// CHECK: #include -#include -// CHECK: #include "hipblas.h" -#include "cublas_v2.h" -#define IDX2C(i,j,ld) (((j)*(ld))+(i)) -#define m 6 -#define n 4 -#define k 5 -int main(void) { - // CHECK: hipError_t cudaStat; - // CHECK: hipblasStatus_t stat; - // CHECK: hipblasHandle_t handle; - cudaError_t cudaStat; - cublasStatus_t stat; - cublasHandle_t handle; - int i, j; - float * a; - float * b; - float * c; - a = (float *)malloc(m*k * sizeof(float)); - b = (float *)malloc(k*n * sizeof(float)); - c = (float *)malloc(m*n * sizeof(float)); - int ind = 11; - for (j = 0; j -#include -#include -#include -// CHECK: #include "rocblas.h" -#include "cublas.h" -#define M 6 -#define N 5 -#define IDX2C(i,j,ld) (((j)*(ld))+(i)) -static __inline__ void modify(float *m, int ldm, int n, int p, int q, float - alpha, float beta) { - // CHECK: rocblas_sscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - // CHECK: rocblas_sscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); - cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); -} -int main(void) { - int i, j; - // CHECK: rocblas_status stat; - cublasStatus stat; - float* devPtrA; - float* a = 0; - a = (float *)malloc(M * N * sizeof(*a)); - if (!a) { - printf("host memory allocation failed"); - return EXIT_FAILURE; - } - for (j = 0; j < N; j++) { - for (i = 0; i < M; i++) { - a[IDX2C(i, j, M)] = (float)(i * M + j + 1); - } - } - // cublasInit is not supported yet - cublasInit(); - // cublasAlloc is not supported yet - stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("device memory allocation failed"); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M); - stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data download failed"); - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); - // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M); - stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data upload failed"); - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - for (j = 0; j < N; j++) { - for (i = 0; i < M; i++) { - printf("%7.0f", a[IDX2C(i, j, M)]); - } - printf("\n"); - } - free(a); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu deleted file mode 100644 index 0202e2f7fa..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu +++ /dev/null @@ -1,90 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args - -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include "rocblas.h" -#include "cublas_v2.h" -#define M 6 -#define N 5 -#define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1)) -// CHECK: static __inline__ void modify(rocblas_handle handle, float *m, int ldm, int -static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int - n, int p, int q, float alpha, float beta) { - // CHECK: rocblas_sscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); - // CHECK: rocblas_sscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); - cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); - cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); -} -int main(void) { - // CHECK: hipError_t cudaStat; - // CHECK: rocblas_status stat; - // CHECK: rocblas_handle handle; - cudaError_t cudaStat; - cublasStatus_t stat; - cublasHandle_t handle; - int i, j; - float* devPtrA; - float* a = 0; - a = (float *)malloc(M * N * sizeof(*a)); - if (!a) { - printf("host memory allocation failed"); - return EXIT_FAILURE; - } - for (j = 1; j <= N; j++) { - for (i = 1; i <= M; i++) { - a[IDX2F(i, j, M)] = (float)((i - 1) * M + j); - } - } - // CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a)); - cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a)); - // CHECK: if (cudaStat != hipSuccess) { - if (cudaStat != cudaSuccess) { - printf("device memory allocation failed"); - return EXIT_FAILURE; - } - // CHECK: stat = rocblas_create_handle(&handle); - stat = cublasCreate(&handle); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("CUBLAS initialization failed\n"); - return EXIT_FAILURE; - } - // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M); - stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data download failed"); - // CHECK: hipFree(devPtrA); - // CHECK: rocblas_destroy_handle(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - return EXIT_FAILURE; - } - modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f); - // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M); - stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data upload failed"); - // CHECK: hipFree(devPtrA); - // CHECK: rocblas_destroy_handle(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - return EXIT_FAILURE; - } - // CHECK: hipFree(devPtrA); - // CHECK: rocblas_destroy_handle(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - for (j = 1; j <= N; j++) { - for (i = 1; i <= M; i++) { - printf("%7.0f", a[IDX2F(i, j, M)]); - } - printf("\n"); - } - free(a); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu deleted file mode 100644 index 8e35f28f0e..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu +++ /dev/null @@ -1,108 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args - -#include -#include -// CHECK: #include -#include -// CHECK: #include "rocblas.h" -#include "cublas_v2.h" -#define IDX2C(i,j,ld) (((j)*(ld))+(i)) -#define m 6 -#define n 4 -#define k 5 -int main(void) { - // CHECK: hipError_t cudaStat; - // CHECK: rocblas_status stat; - // CHECK: rocblas_handle handle; - cudaError_t cudaStat; - cublasStatus_t stat; - cublasHandle_t handle; - int i, j; - float * a; - float * b; - float * c; - a = (float *)malloc(m*k * sizeof(float)); - b = (float *)malloc(k*n * sizeof(float)); - c = (float *)malloc(m*n * sizeof(float)); - int ind = 11; - for (j = 0; j -// CHECK: #include "hip/hip_complex.h" -#include "cuComplex.h" - -#define TYPEFLOAT -#define DIMX 100 -#define DIMY 40 -#define moveX 2 -#define moveY 1 - -#define MAXITERATIONS 10 - -#ifdef TYPEFLOAT -#define TYPE float -// CHECK: #define cTYPE hipFloatComplex -#define cTYPE cuFloatComplex -// CHECK: #define cMakecuComplex(re,i) make_hipFloatComplex(re,i) -#define cMakecuComplex(re,i) make_cuFloatComplex(re,i) -#endif -#ifdef TYPEDOUBLE -// CHECK: #define TYPE hipDoubleComplex -#define TYPE cuDoubleComplex -// CHECK: #define cMakecuComplex(re,i) make_hipDoubleComplex(re,i) -#define cMakecuComplex(re,i) make_cuDoubleComplex(re,i) -#endif - -__device__ cTYPE juliaFunctor(cTYPE p, cTYPE c) { - // CHECK: return hipCaddf(hipCmulf(p, p), c); - return cuCaddf(cuCmulf(p, p), c); -} - -__device__ cTYPE convertToComplex(int x, int y, float zoom) { - TYPE jx = 1.5 * (x - DIMX / 2) / (0.5 * zoom * DIMX) + moveX; - TYPE jy = (y - DIMY / 2) / (0.5 * zoom * DIMY) + moveY; - return cMakecuComplex(jx, jy); -} - -__device__ int evolveComplexPoint(cTYPE p, cTYPE c) { - int it = 1; - // CHECK: while (it <= MAXITERATIONS && hipCabsf(p) <= 4) { - while (it <= MAXITERATIONS && cuCabsf(p) <= 4) { - p = juliaFunctor(p, c); - it++; - } - return it; -} - -__global__ void computeJulia(int* data, cTYPE c, float zoom) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - int j = blockIdx.y * blockDim.y + threadIdx.y; - - if (i -#include -#include -#include - -// CHECK: #include -#include -// CHECK: #include "hipDNN.h" -#include "cudnn.h" - -// CHECK: hipError_t err = (f); \ -// CHECK: if (err != hipSuccess) { \ - -#define CUDA_CALL(f) { \ - cudaError_t err = (f); \ - if (err != cudaSuccess) { \ - std::cout \ - << " Error occurred: " << err << std::endl; \ - std::exit(1); \ - } \ -} -// CHECK: hipdnnStatus_t err = (f); \ -// CHECK: if (err != HIPDNN_STATUS_SUCCESS) { \ - -#define CUDNN_CALL(f) { \ - cudnnStatus_t err = (f); \ - if (err != CUDNN_STATUS_SUCCESS) { \ - std::cout \ - << " Error occurred: " << err << std::endl; \ - std::exit(1); \ - } \ -} - -__global__ void dev_const(float *px, float k) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - px[tid] = k; -} - -__global__ void dev_iota(float *px) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - px[tid] = tid; -} - -void print(const float *data, int n, int c, int h, int w) { - std::vector buffer(1 << 20); - // CHECK: CUDA_CALL(hipMemcpy( - CUDA_CALL(cudaMemcpy( - buffer.data(), data, - n * c * h * w * sizeof(float), - // CHECK: hipMemcpyDeviceToHost)); - cudaMemcpyDeviceToHost)); - int a = 0; - for (int i = 0; i < n; ++i) { - for (int j = 0; j < c; ++j) { - std::cout << "n=" << i << ", c=" << j << ":" << std::endl; - for (int k = 0; k < h; ++k) { - for (int l = 0; l < w; ++l) { - std::cout << std::setw(4) << std::right << buffer[a]; - ++a; - } - std::cout << std::endl; - } - } - } - std::cout << std::endl; -} - -int main() { - // CHECK: hipdnnHandle_t cudnn; - cudnnHandle_t cudnn; - // CHECK: CUDNN_CALL(hipdnnCreate(&cudnn)); - CUDNN_CALL(cudnnCreate(&cudnn)); - - // input - const int in_n = 1; - const int in_c = 1; - const int in_h = 5; - const int in_w = 5; - std::cout << "in_n: " << in_n << std::endl; - std::cout << "in_c: " << in_c << std::endl; - std::cout << "in_h: " << in_h << std::endl; - std::cout << "in_w: " << in_w << std::endl; - std::cout << std::endl; - // CHECK: hipdnnTensorDescriptor_t in_desc; - cudnnTensorDescriptor_t in_desc; - // CHECK: CUDNN_CALL(hipdnnCreateTensorDescriptor(&in_desc)); - CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc)); - // CHECK: CUDNN_CALL(hipdnnSetTensor4dDescriptor( - CUDNN_CALL(cudnnSetTensor4dDescriptor( - // CHECK: in_desc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, - in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, - in_n, in_c, in_h, in_w)); - - float *in_data; - // CHECK: CUDA_CALL(hipMalloc( - CUDA_CALL(cudaMalloc( - &in_data, in_n * in_c * in_h * in_w * sizeof(float))); - - // filter - const int filt_k = 1; - const int filt_c = 1; - const int filt_h = 2; - const int filt_w = 2; - std::cout << "filt_k: " << filt_k << std::endl; - std::cout << "filt_c: " << filt_c << std::endl; - std::cout << "filt_h: " << filt_h << std::endl; - std::cout << "filt_w: " << filt_w << std::endl; - std::cout << std::endl; - - // CHECK: hipdnnFilterDescriptor_t filt_desc; - cudnnFilterDescriptor_t filt_desc; - // CHECK: CUDNN_CALL(hipdnnCreateFilterDescriptor(&filt_desc)); - CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc)); - // CHECK: CUDNN_CALL(hipdnnSetFilter4dDescriptor( - CUDNN_CALL(cudnnSetFilter4dDescriptor( - // CHECK: filt_desc, HIPDNN_DATA_FLOAT, HIPDNN_TENSOR_NCHW, - filt_desc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, - filt_k, filt_c, filt_h, filt_w)); - - float *filt_data; - // CUDA_CALL(hipMalloc( - CUDA_CALL(cudaMalloc( - &filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float))); - - // convolution - const int pad_h = 1; - const int pad_w = 1; - const int str_h = 1; - const int str_w = 1; - const int dil_h = 1; - const int dil_w = 1; - std::cout << "pad_h: " << pad_h << std::endl; - std::cout << "pad_w: " << pad_w << std::endl; - std::cout << "str_h: " << str_h << std::endl; - std::cout << "str_w: " << str_w << std::endl; - std::cout << "dil_h: " << dil_h << std::endl; - std::cout << "dil_w: " << dil_w << std::endl; - std::cout << std::endl; - - // CHECK: hipdnnConvolutionDescriptor_t conv_desc; - cudnnConvolutionDescriptor_t conv_desc; - // CUDNN_CALL(hipdnnCreateConvolutionDescriptor(&conv_desc)); - CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc)); - // CHECK: CUDNN_CALL(hipdnnSetConvolution2dDescriptor( - CUDNN_CALL(cudnnSetConvolution2dDescriptor( - conv_desc, - pad_h, pad_w, str_h, str_w, dil_h, dil_w, - // CHECK: HIPDNN_CONVOLUTION, HIPDNN_DATA_FLOAT)); - CUDNN_CONVOLUTION, CUDNN_DATA_FLOAT)); - - // output - int out_n; - int out_c; - int out_h; - int out_w; - - // CHECK: CUDNN_CALL(hipdnnGetConvolution2dForwardOutputDim( - CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim( - conv_desc, in_desc, filt_desc, - &out_n, &out_c, &out_h, &out_w)); - - std::cout << "out_n: " << out_n << std::endl; - std::cout << "out_c: " << out_c << std::endl; - std::cout << "out_h: " << out_h << std::endl; - std::cout << "out_w: " << out_w << std::endl; - std::cout << std::endl; - // CHECK: hipdnnTensorDescriptor_t out_desc; - cudnnTensorDescriptor_t out_desc; - // CHECK: CUDNN_CALL(hipdnnCreateTensorDescriptor(&out_desc)); - CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc)); - // CHECK: CUDNN_CALL(hipdnnSetTensor4dDescriptor( - CUDNN_CALL(cudnnSetTensor4dDescriptor( - // CHECK: out_desc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, - out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, - out_n, out_c, out_h, out_w)); - - - cudnnDataType_t *dataType = nullptr; - cudnnTensorFormat_t *tensorFormat = nullptr; - int *p_filt_k = nullptr; - int *p_filt_c = nullptr; - int *p_filt_h = nullptr; - int *p_filt_w = nullptr; - - // CHECK: CUDNN_CALL(hipdnnGetFilter4dDescriptor( - CUDNN_CALL(cudnnGetFilter4dDescriptor( - filt_desc, dataType, tensorFormat, - p_filt_k, p_filt_c, p_filt_h, p_filt_w)); - - float *out_data; - // CHECK: CUDA_CALL(hipMalloc( - CUDA_CALL(cudaMalloc( - &out_data, out_n * out_c * out_h * out_w * sizeof(float))); - - // algorithm - // CHECK: hipdnnConvolutionFwdAlgo_t algo; - cudnnConvolutionFwdAlgo_t algo; - // CHECK: CUDNN_CALL(hipdnnGetConvolutionForwardAlgorithm( - CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm( - cudnn, - in_desc, filt_desc, conv_desc, out_desc, - // CHECK: HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo)); - CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo)); - - std::cout << "Convolution algorithm: " << algo << std::endl; - std::cout << std::endl; - - // workspace - size_t ws_size; - // CHECK: CUDNN_CALL(hipdnnGetConvolutionForwardWorkspaceSize( - CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize( - cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size)); - - float *ws_data; - // CHECK: CUDA_CALL(hipMalloc(&ws_data, ws_size)); - CUDA_CALL(cudaMalloc(&ws_data, ws_size)); - - std::cout << "Workspace size: " << ws_size << std::endl; - std::cout << std::endl; - - // perform - float alpha = 1.f; - float beta = 0.f; - // CHECK: hipLaunchKernelGGL(dev_iota, dim3(in_w * in_h), dim3(in_n * in_c), 0, 0, in_data); - // CHECK: hipLaunchKernelGGL(dev_const, dim3(filt_w * filt_h), dim3(filt_k * filt_c), 0, 0, filt_data, 1.f); - dev_iota<<>>(in_data); - dev_const<<>>(filt_data, 1.f); - // CHECK: CUDNN_CALL(hipdnnConvolutionForward( - CUDNN_CALL(cudnnConvolutionForward( - cudnn, - &alpha, in_desc, in_data, filt_desc, filt_data, - conv_desc, algo, ws_data, ws_size, - &beta, out_desc, out_data)); - - // results - std::cout << "in_data:" << std::endl; - print(in_data, in_n, in_c, in_h, in_w); - - std::cout << "filt_data:" << std::endl; - print(filt_data, filt_k, filt_c, filt_h, filt_w); - - std::cout << "out_data:" << std::endl; - print(out_data, out_n, out_c, out_h, out_w); - - // finalizing - // CHECK: CUDA_CALL(hipFree(ws_data)); - CUDA_CALL(cudaFree(ws_data)); - // CHECK: CUDA_CALL(hipFree(out_data)); - CUDA_CALL(cudaFree(out_data)); - // CHECK: CUDNN_CALL(hipdnnDestroyTensorDescriptor(out_desc)); - CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc)); - // CHECK: CUDNN_CALL(hipdnnDestroyConvolutionDescriptor(conv_desc)); - CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc)); - // CHECK: CUDA_CALL(hipFree(filt_data)); - CUDA_CALL(cudaFree(filt_data)); - // CHECK: CUDNN_CALL(hipdnnDestroyFilterDescriptor(filt_desc)); - CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc)); - // CHECK: CUDA_CALL(hipFree(in_data)); - CUDA_CALL(cudaFree(in_data)); - // CHECK: CUDNN_CALL(hipdnnDestroyTensorDescriptor(in_desc)); - CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc)); - // CHECK: CUDNN_CALL(hipdnnDestroy(cudnn)); - CUDNN_CALL(cudnnDestroy(cudnn)); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuDNN/cudnn_softmax.cu b/tests/hipify-clang/unit_tests/libraries/cuDNN/cudnn_softmax.cu deleted file mode 100644 index a2052b3de6..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuDNN/cudnn_softmax.cu +++ /dev/null @@ -1,159 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -#include -// CHECK: #include -#include - -/** - * - * Author: Jon Gauthier - * February 2015 - * -. * Adopted for CUDA/CUDNN 9.0 - */ - -void printMatrix(const double *mat, int m, int n) { - for (int j = 0; j < n; j++) { - for (int i = 0; i < m; i++) { - printf("%f\n", mat[j * m + i]); - } - printf("\n\n"); - } -} - -double *makeDiffData(int m, int c) { - double *diff = (double *) calloc(m * c, sizeof(double)); - for (int j = 0; j < m; j++) { - int class_ = rand() % c; - printf("%d class: %d\n", j, class_); - for (int i = 0; i < c; i++) - diff[j * c + i] = class_ == i ? -c / (double) m : 0; - } - - return diff; -} - -int main() { - int m = 5, c = 4, numChannels = 1; - - double *fcLayer = (double *) malloc(m * c * sizeof(double)); - for (int i = 0; i < m; i++) { - double def = rand() % 25; - for (int c_idx = 0; c_idx < c; c_idx++) { - int offset = i * c + c_idx; - fcLayer[offset] = def; - } - } - printf("FC LAYER:\n"); - printMatrix(fcLayer, c, m); - - double *d_fcLayer; - // CHECK: hipMalloc((void**) &d_fcLayer, m * c * sizeof(double)); - cudaMalloc((void**) &d_fcLayer, m * c * sizeof(double)); - // CHECK: hipMemcpy(d_fcLayer, fcLayer, m * c * sizeof(double), hipMemcpyHostToDevice); - cudaMemcpy(d_fcLayer, fcLayer, m * c * sizeof(double), cudaMemcpyHostToDevice); - - double *d_softmaxData; - // CHECK: hipMalloc((void**) &d_softmaxData, m * c * sizeof(double)); - cudaMalloc((void**) &d_softmaxData, m * c * sizeof(double)); - - // CHECK: hipdnnHandle_t handle; - cudnnHandle_t handle; - // CHECK: hipdnnCreate(&handle); - cudnnCreate(&handle); - - float one = 1; - float zero = 0; - - // softmaxForward(n, c, h, w, dstData, &srcData); - // CHECK: hipdnnTensorDescriptor_t srcTensorDesc, sftTensorDesc; - // CHECK: hipdnnCreateTensorDescriptor(&srcTensorDesc); - // CHECK: hipdnnCreateTensorDescriptor(&sftTensorDesc); - cudnnTensorDescriptor_t srcTensorDesc, sftTensorDesc; - cudnnCreateTensorDescriptor(&srcTensorDesc); - cudnnCreateTensorDescriptor(&sftTensorDesc); - // CHECK: hipdnnSetTensor4dDescriptor(srcTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_DOUBLE, - cudnnSetTensor4dDescriptor(srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_DOUBLE, - m, c, 1, 1); - // CHECK: hipdnnSetTensor4dDescriptor(sftTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_DOUBLE, - cudnnSetTensor4dDescriptor(sftTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_DOUBLE, - m, c, 1, 1); - // CHECK: hipdnnSoftmaxForward(handle, HIPDNN_SOFTMAX_ACCURATE, HIPDNN_SOFTMAX_MODE_CHANNEL, &one, - cudnnSoftmaxForward(handle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, &one, - srcTensorDesc, d_fcLayer, &zero, sftTensorDesc, d_softmaxData); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - // Copy back - double *result = (double *) malloc(m * c * sizeof(double)); - // CHECK: hipMemcpy(result, d_softmaxData, m * c * sizeof(double), hipMemcpyDeviceToHost); - // CHECK: hipDeviceSynchronize(); - cudaMemcpy(result, d_softmaxData, m * c * sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - - // Log - printf("SOFTMAX:\n"); - printMatrix(result, c, m); - - // Try backward - // CHECK: hipdnnTensorDescriptor_t diffTensorDesc; - // CHECK: hipdnnCreateTensorDescriptor(&diffTensorDesc); - // CHECK: hipdnnSetTensor4dDescriptor(diffTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_DOUBLE, - cudnnTensorDescriptor_t diffTensorDesc; - cudnnCreateTensorDescriptor(&diffTensorDesc); - cudnnSetTensor4dDescriptor(diffTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_DOUBLE, - m, c, 1, 1); - - double *d_gradData; - // CHECK: hipMalloc((void**) &d_gradData, m * c * sizeof(double)); - cudaMalloc((void**) &d_gradData, m * c * sizeof(double)); - - double *diffData = makeDiffData(m, c); - double *d_diffData; - // CHECK: hipMalloc((void**) &d_diffData, m * c * sizeof(double)); - // CHECK: hipMemcpy(d_diffData, diffData, m * c * sizeof(double), hipMemcpyHostToDevice); - // CHECK: hipDeviceSynchronize(); - cudaMalloc((void**) &d_diffData, m * c * sizeof(double)); - cudaMemcpy(d_diffData, diffData, m * c * sizeof(double), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - // CHECK: hipdnnSoftmaxBackward(handle, HIPDNN_SOFTMAX_ACCURATE, HIPDNN_SOFTMAX_MODE_CHANNEL, - cudnnSoftmaxBackward(handle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, - &one, srcTensorDesc, d_softmaxData, diffTensorDesc, d_diffData, &zero, sftTensorDesc, d_gradData); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - // Copy back - double *result_backward = (double *) malloc(m * c * sizeof(double)); - // CHECK: hipMemcpy(result_backward, d_gradData, m * c * sizeof(double), hipMemcpyDeviceToHost); - // CHECK: hipDeviceSynchronize(); - cudaMemcpy(result_backward, d_gradData, m * c * sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - - // Log - printf("GRADIENT:\n"); - printMatrix(result_backward, c, m); - - // Destruct - free(result); - free(diffData); - free(result_backward); - free(fcLayer); - - // CHECK: hipdnnDestroyTensorDescriptor(srcTensorDesc); - // CHECK: hipdnnDestroyTensorDescriptor(sftTensorDesc); - // CHECK: hipdnnDestroyTensorDescriptor(diffTensorDesc); - // CHECK: hipFree(d_fcLayer); - // CHECK: hipFree(d_softmaxData); - // CHECK: hipFree(d_gradData); - // CHECK: hipFree(d_diffData); - // CHECK: hipdnnDestroy(handle); - cudnnDestroyTensorDescriptor(srcTensorDesc); - cudnnDestroyTensorDescriptor(sftTensorDesc); - cudnnDestroyTensorDescriptor(diffTensorDesc); - cudaFree(d_fcLayer); - cudaFree(d_softmaxData); - cudaFree(d_gradData); - cudaFree(d_diffData); - cudnnDestroy(handle); -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/libraries/cuFFT/simple_cufft.cu b/tests/hipify-clang/unit_tests/libraries/cuFFT/simple_cufft.cu deleted file mode 100644 index 9c05a53fa8..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuFFT/simple_cufft.cu +++ /dev/null @@ -1,78 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -#include -// CHECK: #include -#include -#include -#include - -#define DATASIZE 8 -#define BATCH 2 - -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -// CHECK: inline void gpuAssert(hipError_t code, const char *file, int line, bool abort = true) -inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) -{ - // CHECK: if (code != hipSuccess) - if (code != cudaSuccess) - { - // CHECK: fprintf(stderr, "GPUassert: %s %s %dn", hipGetErrorString(code), file, line); - fprintf(stderr, "GPUassert: %s %s %dn", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } -} - -int main() -{ - // --- Host side input data allocation and initialization - // CHECK: hipfftReal *hostInputData = (hipfftReal*)malloc(DATASIZE*BATCH * sizeof(hipfftReal)); - cufftReal *hostInputData = (cufftReal*)malloc(DATASIZE*BATCH * sizeof(cufftReal)); - for (int i = 0; iHost copy of the results - // CHECK: gpuErrchk(hipMemcpy(hostOutputData, deviceOutputData, (DATASIZE / 2 + 1) * BATCH * sizeof(hipfftComplex), hipMemcpyDeviceToHost)); - gpuErrchk(cudaMemcpy(hostOutputData, deviceOutputData, (DATASIZE / 2 + 1) * BATCH * sizeof(cufftComplex), cudaMemcpyDeviceToHost)); - - for (int i = 0; i -#include -#include -#include -#include -#include -#include -#include - -#include "cmdparser.hpp" -// CHECK: #include -#include -// CHECK: #include -#include - -// CHECK: if ((x) != hipSuccess) { -#define CUDA_CALL(x) \ - do { \ - if ((x) != cudaSuccess) { \ - printf("Error at %s:%d\n", __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) -// CHECK: if ((x) != HIPRAND_STATUS_SUCCESS) { -#define CURAND_CALL(x) \ - do { \ - if ((x) != CURAND_STATUS_SUCCESS) { \ - printf("Error at %s:%d\n", __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) - -#ifndef DEFAULT_RAND_N -const size_t DEFAULT_RAND_N = 1024 * 1024 * 128; -#endif - -// CHECK: typedef hiprandRngType_t rng_type_t; -typedef curandRngType rng_type_t; - -template -// CHECK: using generate_func_type = std::function; -using generate_func_type = std::function; - -template -void run_benchmark(const cli::Parser& parser, const rng_type_t rng_type, - generate_func_type generate_func) { - const size_t size = parser.get("size"); - const size_t trials = parser.get("trials"); - - T* data; - // CHECK: CUDA_CALL(hipMalloc((void**)&data, size * sizeof(T))); - CUDA_CALL(cudaMalloc((void**)&data, size * sizeof(T))); - - // CHECK: hiprandGenerator_t generator; - // CHECK: CURAND_CALL(hiprandCreateGenerator(&generator, rng_type)); - curandGenerator_t generator; - CURAND_CALL(curandCreateGenerator(&generator, rng_type)); - - const size_t dimensions = parser.get("dimensions"); - // CHECK: hiprandStatus_t status = hiprandSetQuasiRandomGeneratorDimensions(generator, dimensions); - // CHECK: if (status != HIPRAND_STATUS_TYPE_ERROR) - curandStatus_t status = curandSetQuasiRandomGeneratorDimensions(generator, dimensions); - if (status != CURAND_STATUS_TYPE_ERROR) // If the RNG is not quasi-random - { - CURAND_CALL(status); - } - - // Warm-up - for (size_t i = 0; i < 5; i++) { - CURAND_CALL(generate_func(generator, data, size)); - } - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaDeviceSynchronize()); - - // Measurement - auto start = std::chrono::high_resolution_clock::now(); - for (size_t i = 0; i < trials; i++) { - CURAND_CALL(generate_func(generator, data, size)); - } - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaDeviceSynchronize()); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration elapsed = end - start; - - std::cout << std::fixed << std::setprecision(3) << " " - << "Throughput = " << std::setw(8) - << (trials * size * sizeof(T)) / (elapsed.count() / 1e3 * (1 << 30)) - << " GB/s, Samples = " << std::setw(8) - << (trials * size) / (elapsed.count() / 1e3 * (1 << 30)) - << " GSample/s, AvgTime (1 trial) = " << std::setw(8) << elapsed.count() / trials - << " ms, Time (all) = " << std::setw(8) << elapsed.count() << " ms, Size = " << size - << std::endl; - // CHECK: CURAND_CALL(hiprandDestroyGenerator(generator)); - // CHECK: CUDA_CALL(hipFree(data)); - CURAND_CALL(curandDestroyGenerator(generator)); - CUDA_CALL(cudaFree(data)); -} - -void run_benchmarks(const cli::Parser& parser, const rng_type_t rng_type, - const std::string& distribution) { - if (distribution == "uniform-uint") { - // CHECK: if (rng_type != HIPRAND_RNG_QUASI_SOBOL64 && - // CHECK: rng_type != HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64) { - if (rng_type != CURAND_RNG_QUASI_SOBOL64 && - rng_type != CURAND_RNG_QUASI_SCRAMBLED_SOBOL64) { - run_benchmark( - parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, unsigned int* data, size_t size) { - // CHECK: return hiprandGenerate(gen, data, size); - [](curandGenerator_t gen, unsigned int* data, size_t size) { - return curandGenerate(gen, data, size); - }); - } - } - if (distribution == "uniform-long-long") { - // CHECK: if (rng_type == HIPRAND_RNG_QUASI_SOBOL64 || - // CHECK: rng_type == HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64) - if (rng_type == CURAND_RNG_QUASI_SOBOL64 || - rng_type == CURAND_RNG_QUASI_SCRAMBLED_SOBOL64) { - run_benchmark( - parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, unsigned long long* data, size_t size) { - [](curandGenerator_t gen, unsigned long long* data, size_t size) { - // curandGenerateLongLong is yet unsupported by HIP - // CHECK-NOT: return hiprandGenerateLongLong(gen, data, size); - return curandGenerateLongLong(gen, data, size); - }); - } - } - if (distribution == "uniform-float") { - run_benchmark(parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, float* data, size_t size) { - // CHECK: return hiprandGenerateUniform(gen, data, size); - [](curandGenerator_t gen, float* data, size_t size) { - return curandGenerateUniform(gen, data, size); - }); - } - if (distribution == "uniform-double") { - run_benchmark(parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, double* data, size_t size) { - // CHECK: return hiprandGenerateUniformDouble(gen, data, size); - [](curandGenerator_t gen, double* data, size_t size) { - return curandGenerateUniformDouble(gen, data, size); - }); - } - if (distribution == "normal-float") { - run_benchmark(parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, float* data, size_t size) { - // CHECK: return hiprandGenerateNormal(gen, data, size, 0.0f, 1.0f); - [](curandGenerator_t gen, float* data, size_t size) { - return curandGenerateNormal(gen, data, size, 0.0f, 1.0f); - }); - } - if (distribution == "normal-double") { - run_benchmark( - parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, double* data, size_t size) { - // CHECK: return hiprandGenerateNormalDouble(gen, data, size, 0.0, 1.0); - [](curandGenerator_t gen, double* data, size_t size) { - return curandGenerateNormalDouble(gen, data, size, 0.0, 1.0); - }); - } - if (distribution == "log-normal-float") { - run_benchmark(parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, float* data, size_t size) { - // CHECK: return hiprandGenerateLogNormal(gen, data, size, 0.0f, 1.0f); - [](curandGenerator_t gen, float* data, size_t size) { - return curandGenerateLogNormal(gen, data, size, 0.0f, 1.0f); - }); - } - if (distribution == "log-normal-double") { - run_benchmark( - parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, double* data, size_t size) { - // CHECK: return hiprandGenerateLogNormalDouble(gen, data, size, 0.0, 1.0); - [](curandGenerator_t gen, double* data, size_t size) { - return curandGenerateLogNormalDouble(gen, data, size, 0.0, 1.0); - }); - } - if (distribution == "poisson") { - const auto lambdas = parser.get>("lambda"); - for (double lambda : lambdas) { - std::cout << " " - << "lambda " << std::fixed << std::setprecision(1) << lambda << std::endl; - run_benchmark( - parser, rng_type, - // CHECK: [lambda](hiprandGenerator_t gen, unsigned int* data, size_t size) { - // CHECK: return hiprandGeneratePoisson(gen, data, size, lambda); - [lambda](curandGenerator_t gen, unsigned int* data, size_t size) { - return curandGeneratePoisson(gen, data, size, lambda); - }); - } - } -} - -const std::vector all_engines = { - "xorwow", "mrg32k3a", "mtgp32", - // "mt19937", - "philox", "sobol32", - // "scrambled_sobol32", - // "sobol64", - // "scrambled_sobol64", -}; - -const std::vector all_distributions = { - "uniform-uint", "uniform-long-long", "uniform-float", "uniform-double", "normal-float", - "normal-double", "log-normal-float", "log-normal-double", "poisson"}; - -int main(int argc, char* argv[]) { - cli::Parser parser(argc, argv); - - const std::string distribution_desc = - "space-separated list of distributions:" + - std::accumulate(all_distributions.begin(), all_distributions.end(), std::string(), - [](std::string a, std::string b) { return a + "\n " + b; }) + - "\n or all"; - const std::string engine_desc = - "space-separated list of random number engines:" + - std::accumulate(all_engines.begin(), all_engines.end(), std::string(), - [](std::string a, std::string b) { return a + "\n " + b; }) + - "\n or all"; - - parser.set_optional("size", "size", DEFAULT_RAND_N, "number of values"); - parser.set_optional("dimensions", "dimensions", 1, - "number of dimensions of quasi-random values"); - parser.set_optional("trials", "trials", 20, "number of trials"); - parser.set_optional>("dis", "dis", {"uniform-uint"}, - distribution_desc.c_str()); - parser.set_optional>("engine", "engine", {"philox"}, - engine_desc.c_str()); - parser.set_optional>( - "lambda", "lambda", {10.0}, "space-separated list of lambdas of Poisson distribution"); - parser.run_and_exit_if_error(); - - std::vector engines; - { - auto es = parser.get>("engine"); - if (std::find(es.begin(), es.end(), "all") != es.end()) { - engines = all_engines; - } else { - for (auto e : all_engines) { - if (std::find(es.begin(), es.end(), e) != es.end()) engines.push_back(e); - } - } - } - - std::vector distributions; - { - auto ds = parser.get>("dis"); - if (std::find(ds.begin(), ds.end(), "all") != ds.end()) { - distributions = all_distributions; - } else { - for (auto d : all_distributions) { - if (std::find(ds.begin(), ds.end(), d) != ds.end()) distributions.push_back(d); - } - } - } - - int version; - // CHECK: CURAND_CALL(hiprandGetVersion(&version)); - CURAND_CALL(curandGetVersion(&version)); - int runtime_version; - // cudaRuntimeGetVersion is yet unsupported by HIP - // CHECK: CUDA_CALL(hipRuntimeGetVersion(&runtime_version)); - CUDA_CALL(cudaRuntimeGetVersion(&runtime_version)); - int device_id; - // CHECK: CUDA_CALL(hipGetDevice(&device_id)); - // CHECK: hipDeviceProp_t props; - // CHECK: CUDA_CALL(hipGetDeviceProperties(&props, device_id)); - CUDA_CALL(cudaGetDevice(&device_id)); - cudaDeviceProp props; - CUDA_CALL(cudaGetDeviceProperties(&props, device_id)); - - std::cout << "cuRAND: " << version << " "; - std::cout << "Runtime: " << runtime_version << " "; - std::cout << "Device: " << props.name; - std::cout << std::endl << std::endl; - - for (auto engine : engines) { - // CHECK: rng_type_t rng_type = HIPRAND_RNG_PSEUDO_XORWOW; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_XORWOW; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_MRG32K3A; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_MTGP32; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_MT19937; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_PHILOX4_32_10; - // CHECK: rng_type = HIPRAND_RNG_QUASI_SOBOL32; - // CHECK: rng_type = HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL32; - // CHECK: rng_type = HIPRAND_RNG_QUASI_SOBOL64; - // CHECK: rng_type = HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64; - rng_type_t rng_type = CURAND_RNG_PSEUDO_XORWOW; - if (engine == "xorwow") - rng_type = CURAND_RNG_PSEUDO_XORWOW; - else if (engine == "mrg32k3a") - rng_type = CURAND_RNG_PSEUDO_MRG32K3A; - else if (engine == "mtgp32") - rng_type = CURAND_RNG_PSEUDO_MTGP32; - else if (engine == "mt19937") - rng_type = CURAND_RNG_PSEUDO_MT19937; - else if (engine == "philox") - rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10; - else if (engine == "sobol32") - rng_type = CURAND_RNG_QUASI_SOBOL32; - else if (engine == "scrambled_sobol32") - rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL32; - else if (engine == "sobol64") - rng_type = CURAND_RNG_QUASI_SOBOL64; - else if (engine == "scrambled_sobol64") - rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64; - else { - std::cout << "Wrong engine name" << std::endl; - exit(1); - } - - std::cout << engine << ":" << std::endl; - - for (auto distribution : distributions) { - std::cout << " " << distribution << ":" << std::endl; - run_benchmarks(parser, rng_type, distribution); - } - std::cout << std::endl; - } - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp b/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp deleted file mode 100644 index bff9b77cad..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp +++ /dev/null @@ -1,673 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cmdparser.hpp" -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// CHECK: if ((x) != hipSuccess) { -#define CUDA_CALL(x) \ - do { \ - if ((x) != cudaSuccess) { \ - printf("Error at %s:%d\n", __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) -// CHECK: if ((x) != HIPRAND_STATUS_SUCCESS) { -#define CURAND_CALL(x) \ - do { \ - if ((x) != CURAND_STATUS_SUCCESS) { \ - printf("Error at %s:%d\n", __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) - -#ifndef DEFAULT_RAND_N -const size_t DEFAULT_RAND_N = 1024 * 1024 * 128; -#endif - -size_t next_power2(size_t x) -{ - size_t power = 1; - while (power < x) - { - power *= 2; - } - return power; -} - -template -__global__ -void init_kernel(GeneratorState * states, - const unsigned long long seed, - const unsigned long long offset) -{ - const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; - GeneratorState state; - // CHECK: hiprand_init(seed, state_id, offset, &state); - curand_init(seed, state_id, offset, &state); - states[state_id] = state; -} - -template -__global__ -void generate_kernel(GeneratorState * states, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) -{ - const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int stride = gridDim.x * blockDim.x; - - GeneratorState state = states[state_id]; - unsigned int index = state_id; - while(index < size) - { - data[index] = generate_func(&state, extra); - index += stride; - } - states[state_id] = state; -} - -template -struct runner -{ - GeneratorState * states; - - runner(const size_t dimensions, - const size_t blocks, - const size_t threads, - const unsigned long long seed, - const unsigned long long offset) - { - const size_t states_size = blocks * threads; - // CHECK: CUDA_CALL(hipMalloc((void **)&states, states_size * sizeof(GeneratorState))); - CUDA_CALL(cudaMalloc((void **)&states, states_size * sizeof(GeneratorState))); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel), dim3(blocks), dim3(threads), 0, 0, states, seed, offset); - init_kernel<<>>(states, seed, offset); - // CHECK: CUDA_CALL(hipPeekAtLastError()); - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaPeekAtLastError()); - CUDA_CALL(cudaDeviceSynchronize()); - } - - ~runner() - { - CUDA_CALL(cudaFree(states)); - } - - template - void generate(const size_t blocks, - const size_t threads, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) - { - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(blocks), dim3(threads), 0, 0, states, data, size, generate_func, extra); - generate_kernel<<>>(states, data, size, generate_func, extra); - } -}; - -// CHECK: void generate_kernel(hiprandStateMtgp32_t * states, -template -__global__ -void generate_kernel(curandStateMtgp32_t * states, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) -{ - const unsigned int state_id = blockIdx.x; - const unsigned int thread_id = threadIdx.x; - unsigned int index = blockIdx.x * blockDim.x + threadIdx.x; - unsigned int stride = gridDim.x * blockDim.x; - // CHECK: __shared__ hiprandStateMtgp32_t state; - __shared__ curandStateMtgp32_t state; - - if (thread_id == 0) - state = states[state_id]; - __syncthreads(); - - const size_t r = size%blockDim.x; - const size_t size_rounded_up = r == 0 ? size : size + (blockDim.x - r); - while(index < size_rounded_up) - { - auto value = generate_func(&state, extra); - if(index < size) - data[index] = value; - index += stride; - } - __syncthreads(); - - if (thread_id == 0) - states[state_id] = state; -} - -// CHECK: struct runner -template<> -struct runner -{ - // CHECK: hiprandStateMtgp32_t * states; - curandStateMtgp32_t * states; - mtgp32_kernel_params_t * d_param; - - runner(const size_t dimensions, - const size_t blocks, - const size_t threads, - const unsigned long long seed, - const unsigned long long offset) - { - const size_t states_size = std::min((size_t)200, blocks); - // CHECK: CUDA_CALL(hipMalloc((void **)&states, states_size * sizeof(hiprandStateMtgp32_t))); - CUDA_CALL(cudaMalloc((void **)&states, states_size * sizeof(curandStateMtgp32_t))); - // CHECK: CUDA_CALL(hipMalloc((void **)&d_param, sizeof(mtgp32_kernel_params))); - CUDA_CALL(cudaMalloc((void **)&d_param, sizeof(mtgp32_kernel_params))); - // CHECK: CURAND_CALL(hiprandMakeMTGP32Constants(mtgp32dc_params_fast_11213, d_param)); - CURAND_CALL(curandMakeMTGP32Constants(mtgp32dc_params_fast_11213, d_param)); - // CHECK: CURAND_CALL(hiprandMakeMTGP32KernelState(states, mtgp32dc_params_fast_11213, d_param, states_size, seed)); - CURAND_CALL(curandMakeMTGP32KernelState(states, mtgp32dc_params_fast_11213, d_param, states_size, seed)); - } - - ~runner() - { - // CHECK: CUDA_CALL(hipFree(states)); - // CHECK: CUDA_CALL(hipFree(d_param)); - CUDA_CALL(cudaFree(states)); - CUDA_CALL(cudaFree(d_param)); - } - - template - void generate(const size_t blocks, - const size_t threads, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) - { - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(std::min((size_t)200, blocks)), dim3(256), 0, 0, states, data, size, generate_func, extra); - generate_kernel<<>>(states, data, size, generate_func, extra); - } -}; - -// CHECK: void init_kernel(hiprandStateSobol32_t * states, -template -__global__ -void init_kernel(curandStateSobol32_t * states, - const Directions directions, - const unsigned long long offset) -{ - const unsigned int dimension = blockIdx.y; - const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; - // CHECK: hiprandStateSobol32_t state; - // CHECK: hiprand_init(directions[dimension], offset + state_id, &state); - curandStateSobol32_t state; - curand_init(directions[dimension], offset + state_id, &state); - states[gridDim.x * blockDim.x * dimension + state_id] = state; -} - -// CHECK: void generate_kernel(hiprandStateSobol32_t * states, -template -__global__ -void generate_kernel(curandStateSobol32_t * states, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) -{ - const unsigned int dimension = blockIdx.y; - const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int stride = gridDim.x * blockDim.x; - // CHECK: hiprandStateSobol32_t state = states[gridDim.x * blockDim.x * dimension + state_id]; - curandStateSobol32_t state = states[gridDim.x * blockDim.x * dimension + state_id]; - const unsigned int offset = dimension * size; - unsigned int index = state_id; - while(index < size) - { - data[offset + index] = generate_func(&state, extra); - skipahead(stride - 1, &state); - index += stride; - } - state = states[gridDim.x * blockDim.x * dimension + state_id]; - skipahead(static_cast(size), &state); - states[gridDim.x * blockDim.x * dimension + state_id] = state; -} - -// CHECK: struct runner -template<> -struct runner -{ - // CHECK: hiprandStateSobol32_t * states; - curandStateSobol32_t * states; - size_t dimensions; - - runner(const size_t dimensions, - const size_t blocks, - const size_t threads, - const unsigned long long seed, - const unsigned long long offset) - { - this->dimensions = dimensions; - // CHECK: CUDA_CALL(hipMalloc((void **)&states, states_size * sizeof(hiprandStateSobol32_t))); - const size_t states_size = blocks * threads * dimensions; - CUDA_CALL(cudaMalloc((void **)&states, states_size * sizeof(curandStateSobol32_t))); - // CHECK: hiprandDirectionVectors32_t * directions; - curandDirectionVectors32_t * directions; - // CHECK: const size_t size = dimensions * sizeof(hiprandDirectionVectors32_t); - const size_t size = dimensions * sizeof(curandDirectionVectors32_t); - // CHECK: CUDA_CALL(hipMalloc((void **)&directions, size)); - CUDA_CALL(cudaMalloc((void **)&directions, size)); - // CHECK: hiprandDirectionVectors32_t * h_directions; - curandDirectionVectors32_t * h_directions; - // hiprandGetDirectionVectors32 and HIPRAND_DIRECTION_VECTORS_32_JOEKUO6 (of hiprandDirectionVectorSet_t) are yet unsupported by HIP - // CHECK-NOT: CURAND_CALL(hiprandGetDirectionVectors32(&h_directions, HIPRAND_DIRECTION_VECTORS_32_JOEKUO6)); - CURAND_CALL(curandGetDirectionVectors32(&h_directions, CURAND_DIRECTION_VECTORS_32_JOEKUO6)); - // CHECK: CUDA_CALL(hipMemcpy(directions, h_directions, size, hipMemcpyHostToDevice)); - CUDA_CALL(cudaMemcpy(directions, h_directions, size, cudaMemcpyHostToDevice)); - - const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, directions, offset); - init_kernel<<>>(states, directions, offset); - // CHECK: CUDA_CALL(hipPeekAtLastError()); - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaPeekAtLastError()); - CUDA_CALL(cudaDeviceSynchronize()); - // CHECK: CUDA_CALL(hipFree(directions)); - CUDA_CALL(cudaFree(directions)); - } - - ~runner() - { - // CHECK: CUDA_CALL(hipFree(states)); - CUDA_CALL(cudaFree(states)); - } - - template - void generate(const size_t blocks, - const size_t threads, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) - { - const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra); - generate_kernel<<>>(states, data, size / dimensions, generate_func, extra); - } -}; - -template -void run_benchmark(const cli::Parser& parser, - const GenerateFunc& generate_func, - const Extra extra) -{ - const size_t size = parser.get("size"); - const size_t dimensions = parser.get("dimensions"); - const size_t trials = parser.get("trials"); - - const size_t blocks = parser.get("blocks"); - const size_t threads = parser.get("threads"); - - T * data; - // CHECK: CUDA_CALL(hipMalloc((void **)&data, size * sizeof(T))); - CUDA_CALL(cudaMalloc((void **)&data, size * sizeof(T))); - - runner r(dimensions, blocks, threads, 12345ULL, 6789ULL); - - // Warm-up - for (size_t i = 0; i < 5; i++) - { - r.generate(blocks, threads, data, size, generate_func, extra); - // CHECK: CUDA_CALL(hipPeekAtLastError()); - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaPeekAtLastError()); - CUDA_CALL(cudaDeviceSynchronize()); - } - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaDeviceSynchronize()); - - // Measurement - auto start = std::chrono::high_resolution_clock::now(); - for (size_t i = 0; i < trials; i++) - { - r.generate(blocks, threads, data, size, generate_func, extra); - } - // CHECK: CUDA_CALL(hipPeekAtLastError()); - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaPeekAtLastError()); - CUDA_CALL(cudaDeviceSynchronize()); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration elapsed = end - start; - - std::cout << std::fixed << std::setprecision(3) - << " " - << "Throughput = " - << std::setw(8) << (trials * size * sizeof(T)) / - (elapsed.count() / 1e3 * (1 << 30)) - << " GB/s, Samples = " - << std::setw(8) << (trials * size) / - (elapsed.count() / 1e3 * (1 << 30)) - << " GSample/s, AvgTime (1 trial) = " - << std::setw(8) << elapsed.count() / trials - << " ms, Time (all) = " - << std::setw(8) << elapsed.count() - << " ms, Size = " << size - << std::endl; - // CHECK: CUDA_CALL(hipFree(data)); - CUDA_CALL(cudaFree(data)); -} - -template -void run_benchmarks(const cli::Parser& parser, - const std::string& distribution) -{ - if (distribution == "uniform-uint") - { - // curandStateSobol64_t and curandStateScrambledSobol64_t are yet unsupported by HIP - // CHECK-NOT: if (!std::is_same::value && - // CHECK-NOT: !std::is_same::value) - if (!std::is_same::value && - !std::is_same::value) - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand(state); - return curand(state); - }, 0 - ); - } - } - if (distribution == "uniform-long-long") - { - // curandStateSobol64_t and curandStateScrambledSobol64_t are yet unsupported by HIP - // CHECK-NOT: if (!std::is_same::value && - // CHECK-NOT: !std::is_same::value) - if (std::is_same::value || - std::is_same::value) - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand(state); - return curand(state); - }, 0 - ); - } - } - if (distribution == "uniform-float") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_uniform(state); - return curand_uniform(state); - }, 0 - ); - } - if (distribution == "uniform-double") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_uniform_double(state); - return curand_uniform_double(state); - }, 0 - ); - } - if (distribution == "normal-float") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_normal(state); - return curand_normal(state); - }, 0 - ); - } - if (distribution == "normal-double") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_normal_double(state); - return curand_normal_double(state); - }, 0 - ); - } - if (distribution == "log-normal-float") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_log_normal(state, 0.0f, 1.0f); - return curand_log_normal(state, 0.0f, 1.0f); - }, 0 - ); - } - if (distribution == "log-normal-double") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_log_normal_double(state, 0.0, 1.0); - return curand_log_normal_double(state, 0.0, 1.0); - }, 0 - ); - } - if (distribution == "poisson") - { - const auto lambdas = parser.get>("lambda"); - for (double lambda : lambdas) - { - std::cout << " " << "lambda " - << std::fixed << std::setprecision(1) << lambda << std::endl; - run_benchmark(parser, - [] __device__ (GeneratorState * state, double lambda) { - // CHECK: return hiprand_poisson(state, lambda); - return curand_poisson(state, lambda); - }, lambda - ); - } - } - if (distribution == "discrete-poisson") - { - const auto lambdas = parser.get>("lambda"); - for (double lambda : lambdas) - { - std::cout << " " << "lambda " - << std::fixed << std::setprecision(1) << lambda << std::endl; - // CHECK: hiprandDiscreteDistribution_t discrete_distribution; - curandDiscreteDistribution_t discrete_distribution; - // CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(lambda, &discrete_distribution)); - CURAND_CALL(curandCreatePoissonDistribution(lambda, &discrete_distribution)); - run_benchmark(parser, - // CHECK: [] __device__ (GeneratorState * state, hiprandDiscreteDistribution_t discrete_distribution) { - [] __device__ (GeneratorState * state, curandDiscreteDistribution_t discrete_distribution) { - // CHECK: return hiprand_discrete(state, discrete_distribution); - return curand_discrete(state, discrete_distribution); - }, discrete_distribution - ); - // CHECK: CURAND_CALL(hiprandDestroyDistribution(discrete_distribution)); - CURAND_CALL(curandDestroyDistribution(discrete_distribution)); - } - } -} - -const std::vector all_engines = { - "xorwow", - "mrg32k3a", - "mtgp32", - // "mt19937", - "philox", - "sobol32", - // "scrambled_sobol32", - // "sobol64", - // "scrambled_sobol64", -}; - -const std::vector all_distributions = { - "uniform-uint", - // "uniform-long-long", - "uniform-float", - "uniform-double", - "normal-float", - "normal-double", - "log-normal-float", - "log-normal-double", - "poisson", - "discrete-poisson", -}; - -int main(int argc, char *argv[]) -{ - cli::Parser parser(argc, argv); - - const std::string distribution_desc = - "space-separated list of distributions:" + - std::accumulate(all_distributions.begin(), all_distributions.end(), std::string(), - [](std::string a, std::string b) { - return a + "\n " + b; - } - ) + - "\n or all"; - const std::string engine_desc = - "space-separated list of random number engines:" + - std::accumulate(all_engines.begin(), all_engines.end(), std::string(), - [](std::string a, std::string b) { - return a + "\n " + b; - } - ) + - "\n or all"; - - parser.set_optional("size", "size", DEFAULT_RAND_N, "number of values"); - parser.set_optional("dimensions", "dimensions", 1, "number of dimensions of quasi-random values"); - parser.set_optional("trials", "trials", 20, "number of trials"); - parser.set_optional("blocks", "blocks", 256, "number of blocks"); - parser.set_optional("threads", "threads", 256, "number of threads in each block"); - parser.set_optional>("dis", "dis", {"uniform-uint"}, distribution_desc.c_str()); - parser.set_optional>("engine", "engine", {"philox"}, engine_desc.c_str()); - parser.set_optional>("lambda", "lambda", {10.0}, "space-separated list of lambdas of Poisson distribution"); - parser.run_and_exit_if_error(); - - std::vector engines; - { - auto es = parser.get>("engine"); - if (std::find(es.begin(), es.end(), "all") != es.end()) - { - engines = all_engines; - } - else - { - for (auto e : all_engines) - { - if (std::find(es.begin(), es.end(), e) != es.end()) - engines.push_back(e); - } - } - } - - std::vector distributions; - { - auto ds = parser.get>("dis"); - if (std::find(ds.begin(), ds.end(), "all") != ds.end()) - { - distributions = all_distributions; - } - else - { - for (auto d : all_distributions) - { - if (std::find(ds.begin(), ds.end(), d) != ds.end()) - distributions.push_back(d); - } - } - } - - int version; - // CHECK: CURAND_CALL(hiprandGetVersion(&version)); - CURAND_CALL(curandGetVersion(&version)); - int runtime_version; - // cudaRuntimeGetVersion is yet unsupported by HIP - // CHECK: CUDA_CALL(hipRuntimeGetVersion(&runtime_version)); - CUDA_CALL(cudaRuntimeGetVersion(&runtime_version)); - int device_id; - // CHECK: CUDA_CALL(hipGetDevice(&device_id)); - // CHECK: hipDeviceProp_t props; - // CHECK: CUDA_CALL(hipGetDeviceProperties(&props, device_id)); - CUDA_CALL(cudaGetDevice(&device_id)); - cudaDeviceProp props; - CUDA_CALL(cudaGetDeviceProperties(&props, device_id)); - - std::cout << "cuRAND: " << version << " "; - std::cout << "Runtime: " << runtime_version << " "; - std::cout << "Device: " << props.name; - std::cout << std::endl << std::endl; - - for (auto engine : engines) - { - std::cout << engine << ":" << std::endl; - for (auto distribution : distributions) - { - std::cout << " " << distribution << ":" << std::endl; - const std::string plot_name = engine + "-" + distribution; - if (engine == "xorwow") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - else if (engine == "mrg32k3a") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - else if (engine == "philox") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - else if (engine == "sobol32") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - else if (engine == "mtgp32") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - } - } - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuRAND/cmdparser.hpp b/tests/hipify-clang/unit_tests/libraries/cuRAND/cmdparser.hpp deleted file mode 100644 index 1fe8ad9ce2..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuRAND/cmdparser.hpp +++ /dev/null @@ -1,494 +0,0 @@ -// The MIT License (MIT) -// -// Copyright (c) 2015 - 2016 Florian Rappl -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -/* - This file is part of the C++ CmdParser utility. - Copyright (c) 2015 - 2016 Florian Rappl -*/ - -#pragma once -#include -#include -#include -#include -#include -#include - -namespace cli { -struct CallbackArgs { - const std::vector& arguments; - std::ostream& output; - std::ostream& error; -}; -class Parser { - private: - class CmdBase { - public: - explicit CmdBase(const std::string& name, const std::string& alternative, - const std::string& description, bool required, bool dominant, - bool variadic) - : name(name), - command(name.size() > 0 ? "-" + name : ""), - alternative(alternative.size() > 0 ? "--" + alternative : ""), - description(description), - required(required), - handled(false), - arguments({}), - dominant(dominant), - variadic(variadic) {} - - virtual ~CmdBase() {} - - std::string name; - std::string command; - std::string alternative; - std::string description; - bool required; - bool handled; - std::vector arguments; - bool const dominant; - bool const variadic; - - virtual std::string print_value() const = 0; - virtual bool parse(std::ostream& output, std::ostream& error) = 0; - - bool is(const std::string& given) const { return given == command || given == alternative; } - }; - - template - struct ArgumentCountChecker { - static constexpr bool Variadic = false; - }; - - template - struct ArgumentCountChecker> { - static constexpr bool Variadic = true; - }; - - template - class CmdFunction final : public CmdBase { - public: - explicit CmdFunction(const std::string& name, const std::string& alternative, - const std::string& description, bool required, bool dominant) - : CmdBase(name, alternative, description, required, dominant, - ArgumentCountChecker::Variadic) {} - - virtual bool parse(std::ostream& output, std::ostream& error) { - try { - CallbackArgs args{arguments, output, error}; - value = callback(args); - return true; - } catch (...) { - return false; - } - } - - virtual std::string print_value() const { return ""; } - - std::function callback; - T value; - }; - - template - class CmdArgument final : public CmdBase { - public: - explicit CmdArgument(const std::string& name, const std::string& alternative, - const std::string& description, bool required, bool dominant) - : CmdBase(name, alternative, description, required, dominant, - ArgumentCountChecker::Variadic) {} - - virtual bool parse(std::ostream&, std::ostream&) { - try { - value = Parser::parse(arguments, value); - return true; - } catch (...) { - return false; - } - } - - virtual std::string print_value() const { return stringify(value); } - - T value; - }; - - static int parse(const std::vector& elements, const int&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stoi(elements[0]); - } - - static bool parse(const std::vector& elements, const bool& defval) { - if (elements.size() != 0) - throw std::runtime_error("A boolean command line parameter cannot have any arguments."); - - return !defval; - } - - static double parse(const std::vector& elements, const double&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stod(elements[0]); - } - - static float parse(const std::vector& elements, const float&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stof(elements[0]); - } - - static long double parse(const std::vector& elements, const long double&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stold(elements[0]); - } - - static unsigned int parse(const std::vector& elements, const unsigned int&) { - if (elements.size() != 1) throw std::bad_cast(); - - return static_cast(std::stoul(elements[0])); - } - - static unsigned long parse(const std::vector& elements, const unsigned long&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stoul(elements[0]); - } - - static unsigned long long parse(const std::vector& elements, - const unsigned long long&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stoull(elements[0]); - } - - static long parse(const std::vector& elements, const long&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stol(elements[0]); - } - - static std::string parse(const std::vector& elements, const std::string&) { - if (elements.size() != 1) throw std::bad_cast(); - - return elements[0]; - } - - template - static std::vector parse(const std::vector& elements, const std::vector&) { - const T defval = T(); - std::vector values{}; - std::vector buffer(1); - - for (const auto& element : elements) { - buffer[0] = element; - values.push_back(parse(buffer, defval)); - } - - return values; - } - - template - static std::string stringify(const T& value) { - return std::to_string(value); - } - - template - static std::string stringify(const std::vector& values) { - std::stringstream ss{}; - ss << "[ "; - - for (const auto& value : values) { - ss << stringify(value) << " "; - } - - ss << "]"; - return ss.str(); - } - - static std::string stringify(const std::string& str) { return str; } - - public: - explicit Parser(int argc, const char** argv) : _appname(argv[0]) { - for (int i = 1; i < argc; ++i) { - _arguments.push_back(argv[i]); - } - enable_help(); - } - - explicit Parser(int argc, char** argv) : _appname(argv[0]) { - for (int i = 1; i < argc; ++i) { - _arguments.push_back(argv[i]); - } - enable_help(); - } - - ~Parser() { - for (int i = 0, n = _commands.size(); i < n; ++i) { - delete _commands[i]; - } - } - - bool has_help() const { - for (const auto command : _commands) { - if (command->name == "h" && command->alternative == "--help") { - return true; - } - } - - return false; - } - - void enable_help() { - set_callback("h", "help", std::function([this](CallbackArgs& args) { - args.output << this->usage(); - exit(0); - return false; - }), - "", true); - } - - void disable_help() { - for (auto command = _commands.begin(); command != _commands.end(); ++command) { - if ((*command)->name == "h" && (*command)->alternative == "--help") { - _commands.erase(command); - break; - } - } - } - - template - void set_default(bool is_required, const std::string& description = "") { - auto command = new CmdArgument{"", "", description, is_required, false}; - _commands.push_back(command); - } - - template - void set_required(const std::string& name, const std::string& alternative, - const std::string& description = "", bool dominant = false) { - auto command = new CmdArgument{name, alternative, description, true, dominant}; - _commands.push_back(command); - } - - template - void set_optional(const std::string& name, const std::string& alternative, T defaultValue, - const std::string& description = "", bool dominant = false) { - auto command = new CmdArgument{name, alternative, description, false, dominant}; - command->value = defaultValue; - _commands.push_back(command); - } - - template - void set_callback(const std::string& name, const std::string& alternative, - std::function callback, const std::string& description = "", - bool dominant = false) { - auto command = new CmdFunction{name, alternative, description, false, dominant}; - command->callback = callback; - _commands.push_back(command); - } - - inline void run_and_exit_if_error() { - if (run() == false) { - exit(1); - } - } - - inline bool run() { return run(std::cout, std::cerr); } - - inline bool run(std::ostream& output) { return run(output, std::cerr); } - - bool run(std::ostream& output, std::ostream& error) { - if (_arguments.size() > 0) { - auto current = find_default(); - - for (int i = 0, n = _arguments.size(); i < n; ++i) { - auto isarg = _arguments[i].size() > 0 && _arguments[i][0] == '-'; - auto associated = isarg ? find(_arguments[i]) : nullptr; - - if (associated != nullptr) { - current = associated; - associated->handled = true; - } else if (current == nullptr) { - error << no_default(); - return false; - } else { - current->arguments.push_back(_arguments[i]); - current->handled = true; - if (!current->variadic) { - // If the current command is not variadic, then no more arguments - // should be added to it. In this case, switch back to the default - // command. - current = find_default(); - } - } - } - } - - // First, parse dominant arguments since they succeed even if required - // arguments are missing. - for (auto command : _commands) { - if (command->handled && command->dominant && !command->parse(output, error)) { - error << howto_use(command); - return false; - } - } - - // Next, check for any missing arguments. - for (auto command : _commands) { - if (command->required && !command->handled) { - error << howto_required(command); - return false; - } - } - - // Finally, parse all remaining arguments. - for (auto command : _commands) { - if (command->handled && !command->dominant && !command->parse(output, error)) { - error << howto_use(command); - return false; - } - } - - return true; - } - - template - T get(const std::string& name) const { - for (const auto& command : _commands) { - if (command->name == name) { - auto cmd = dynamic_cast*>(command); - - if (cmd == nullptr) { - throw std::runtime_error("Invalid usage of the parameter " + name + - " detected."); - } - - return cmd->value; - } - } - - throw std::runtime_error("The parameter " + name + " could not be found."); - } - - template - T get_if(const std::string& name, std::function callback) const { - auto value = get(name); - return callback(value); - } - - int requirements() const { - int count = 0; - - for (const auto& command : _commands) { - if (command->required) { - ++count; - } - } - - return count; - } - - int commands() const { return static_cast(_commands.size()); } - - inline const std::string& app_name() const { return _appname; } - - protected: - CmdBase* find(const std::string& name) { - for (auto command : _commands) { - if (command->is(name)) { - return command; - } - } - - return nullptr; - } - - CmdBase* find_default() { - for (auto command : _commands) { - if (command->name == "") { - return command; - } - } - - return nullptr; - } - - std::string usage() const { - std::stringstream ss{}; - ss << "Available parameters:\n\n"; - - for (const auto& command : _commands) { - ss << " " << command->command << "\t" << command->alternative; - - if (command->required == true) { - ss << "\t(required)"; - } - - ss << "\n " << command->description; - - if (command->required == false) { - ss << "\n " - << "This parameter is optional. The default value is '" + command->print_value() - << "'."; - } - - ss << "\n\n"; - } - - return ss.str(); - } - - void print_help(std::stringstream& ss) const { - if (has_help()) { - ss << "For more help use --help or -h.\n"; - } - } - - std::string howto_required(CmdBase* command) const { - std::stringstream ss{}; - ss << "The parameter " << command->name << " is required.\n"; - ss << command->description << '\n'; - print_help(ss); - return ss.str(); - } - - std::string howto_use(CmdBase* command) const { - std::stringstream ss{}; - ss << "The parameter " << command->name << " has invalid arguments.\n"; - ss << command->description << '\n'; - print_help(ss); - return ss.str(); - } - - std::string no_default() const { - std::stringstream ss{}; - ss << "No default parameter has been specified.\n"; - ss << "The given argument must be used with a parameter.\n"; - print_help(ss); - return ss.str(); - } - - private: - const std::string _appname; - std::vector _arguments; - std::vector _commands; -}; -} // namespace cli diff --git a/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu b/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu deleted file mode 100644 index 567de05e6e..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu +++ /dev/null @@ -1,416 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Taken from: http://docs.nvidia.com/cuda/curand/device-api-overview.html#poisson-api-example -/* - * This program uses CURAND library for Poisson distribution - * to simulate queues in store for 16 hours. It shows the - * difference of using 3 different APIs: - * - HOST API -arrival of customers is described by Poisson(4) - * - SIMPLE DEVICE API -arrival of customers is described by - * Poisson(4*(sin(x/100)+1)), where x is number of minutes - * from store opening time. - * - ROBUST DEVICE API -arrival of customers is described by: - * - Poisson(2) for first 3 hours. - * - Poisson(1) for second 3 hours. - * - Poisson(3) after 6 hours. - */ - -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// CHECK: #define CUDA_CALL(x) do { if((x) != hipSuccess) { -#define CUDA_CALL(x) do { if((x) != cudaSuccess) { \ - printf("Error at %s:%d\n",__FILE__,__LINE__); \ - return EXIT_FAILURE;}} while(0) -// CHECK: #define CURAND_CALL(x) do { if((x)!=HIPRAND_STATUS_SUCCESS) { -#define CURAND_CALL(x) do { if((x)!=CURAND_STATUS_SUCCESS) { \ - printf("Error at %s:%d\n",__FILE__,__LINE__);\ - return EXIT_FAILURE;}} while(0) - - -#define HOURS 16 -#define OPENING_HOUR 7 -#define CLOSING_HOUR (OPENING_HOUR + HOURS) - -#define access_2D(type, ptr, row, column, pitch)\ - *((type*)((char*)ptr + (row) * pitch) + column) - -enum API_TYPE { - HOST_API = 0, - SIMPLE_DEVICE_API = 1, - ROBUST_DEVICE_API = 2, -}; - -/* global variables */ -API_TYPE api; -int report_break; -int cashiers_load_h[HOURS]; -__constant__ int cashiers_load[HOURS]; -// CHECK: __global__ void setup_kernel(hiprandState *state) -__global__ void setup_kernel(curandState *state) -{ - int id = threadIdx.x + blockIdx.x * blockDim.x; - /* Each thread gets same seed, a different sequence - number, no offset */ - // CHECK: hiprand_init(1234, id, 0, &state[id]); - curand_init(1234, id, 0, &state[id]); -} - -__inline__ __device__ -void update_queue(int id, int min, unsigned int new_customers, - unsigned int &queue_length, - unsigned int *queue_lengths, size_t pitch) -{ - int balance; - balance = new_customers - 2 * cashiers_load[(min-1)/60]; - if (balance + (int)queue_length <= 0){ - queue_length = 0; - }else{ - queue_length += balance; - } - /* Store results */ - access_2D(unsigned int, queue_lengths, min-1, id, pitch) - = queue_length; -} - -// CHECK: __global__ void simple_device_API_kernel(hiprandState *state, -__global__ void simple_device_API_kernel(curandState *state, - unsigned int *queue_lengths, size_t pitch) -{ - int id = threadIdx.x + blockIdx.x * blockDim.x; - unsigned int new_customers; - unsigned int queue_length = 0; - /* Copy state to local memory for efficiency */ - // CHECK: hiprandState localState = state[id]; - curandState localState = state[id]; - /* Simulate queue in time */ - for(int min = 1; min <= 60 * HOURS; min++) { - /* Draw number of new customers depending on API */ - // CHECK: new_customers = hiprand_poisson(&localState, - new_customers = curand_poisson(&localState, - 4*(sin((float)min/100.0)+1)); - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } - /* Copy state back to global memory */ - state[id] = localState; -} - - -__global__ void host_API_kernel(unsigned int *poisson_numbers, - unsigned int *queue_lengths, size_t pitch) -{ - int id = threadIdx.x + blockIdx.x * blockDim.x; - unsigned int new_customers; - unsigned int queue_length = 0; - /* Simulate queue in time */ - for(int min = 1; min <= 60 * HOURS; min++) { - /* Get random number from global memory */ - new_customers = poisson_numbers - [blockDim.x * gridDim.x * (min -1) + id]; - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } -} -// CHECK: __global__ void robust_device_API_kernel(hiprandState *state, -// CHECK: hiprandDiscreteDistribution_t poisson_1, -// CHECK: hiprandDiscreteDistribution_t poisson_2, -// CHECK: hiprandDiscreteDistribution_t poisson_3, -__global__ void robust_device_API_kernel(curandState *state, - curandDiscreteDistribution_t poisson_1, - curandDiscreteDistribution_t poisson_2, - curandDiscreteDistribution_t poisson_3, - unsigned int *queue_lengths, size_t pitch) -{ - int id = threadIdx.x + blockIdx.x * 64; - unsigned int new_customers; - unsigned int queue_length = 0; - /* Copy state to local memory for efficiency */ - // CHECK: hiprandState localState = state[id]; - curandState localState = state[id]; - /* Simulate queue in time */ - /* first 3 hours */ - for(int min = 1; min <= 60 * 3; min++) { - /* draw number of new customers depending on API */ - new_customers = - // CHECK: hiprand_discrete(&localState, poisson_2); - curand_discrete(&localState, poisson_2); - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } - /* second 3 hours */ - for(int min = 60 * 3 + 1; min <= 60 * 6; min++) { - /* draw number of new customers depending on API */ - new_customers = - // CHECK: hiprand_discrete(&localState, poisson_1); - curand_discrete(&localState, poisson_1); - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } - /* after 6 hours */ - for(int min = 60 * 6 + 1; min <= 60 * HOURS; min++) { - /* draw number of new customers depending on API */ - new_customers = - // CHECK: hiprand_discrete(&localState, poisson_3); - curand_discrete(&localState, poisson_3); - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } - /* Copy state back to global memory */ - state[id] = localState; -} - -/* Set time intervals between reports */ -void report_settings() -{ - do{ - printf("Set time intervals between queue reports"); - printf("(in minutes > 0)\n"); - if (scanf("%d", &report_break) == 0) continue; - }while(report_break <= 0); -} - - -/* Set number of cashiers each hour */ -void add_cachiers(int *cashiers_load) -{ - int i, min, max, begin, end; - printf("Cashier serves 2 customers per minute...\n"); - for (i = 0; i < HOURS; i++){ - cashiers_load_h[i] = 0; - } - while (true){ - printf("Adding cashier...\n"); - min = OPENING_HOUR; - max = CLOSING_HOUR-1; - do{ - printf("Set hour that cahier comes (%d-%d)", - min, max); - printf(" [type 0 to finish adding cashiers]\n"); - if (scanf("%d", &begin) == 0) continue; - }while (begin > max || (begin < min && begin != 0)); - if (begin == 0) break; - min = begin+1; - max = CLOSING_HOUR; - do{ - printf("Set hour that cahier leaves (%d-%d)", - min, max); - printf(" [type 0 to finish adding cashiers]\n"); - if (scanf("%d", &end) == 0) continue; - }while (end > max || (end < min && end != 0)); - if (end == 0) break; - for (i = begin - OPENING_HOUR; - i < end - OPENING_HOUR; i++){ - cashiers_load_h[i]++; - } - } - for (i = OPENING_HOUR; i < CLOSING_HOUR; i++){ - printf("\n%2d:00 - %2d:00 %d cashier", - i, i+1, cashiers_load_h[i-OPENING_HOUR]); - if (cashiers_load[i-OPENING_HOUR] != 1) printf("s"); - } - printf("\n"); -} - -/* Set API type */ -API_TYPE set_API_type() -{ - printf("Choose API type:\n"); - int choose; - do{ - printf("type 1 for HOST API\n"); - printf("type 2 for SIMPLE DEVICE API\n"); - printf("type 3 for ROBUST DEVICE API\n"); - if (scanf("%d", &choose) == 0) continue; - }while( choose < 1 || choose > 3); - switch(choose){ - case 1: return HOST_API; - case 2: return SIMPLE_DEVICE_API; - case 3: return ROBUST_DEVICE_API; - default: - fprintf(stderr, "wrong API\n"); - return HOST_API; - } -} - -void settings() -{ - add_cachiers(cashiers_load); - // CHECK: hipMemcpyToSymbol(HIP_SYMBOL("cashiers_load"), cashiers_load_h, - // CHECK: HOURS * sizeof(int), 0, hipMemcpyHostToDevice); - cudaMemcpyToSymbol("cashiers_load", cashiers_load_h, - HOURS * sizeof(int), 0, cudaMemcpyHostToDevice); - report_settings(); - api = set_API_type(); -} - -void print_statistics(unsigned int *hostResults, size_t pitch) -{ - int min, i, hour, minute; - unsigned int sum; - for(min = report_break; min <= 60 * HOURS; - min += report_break) { - sum = 0; - for(i = 0; i < 64 * 64; i++) { - sum += access_2D(unsigned int, hostResults, - min-1, i, pitch); - } - hour = OPENING_HOUR + min/60; - minute = min%60; - printf("%2d:%02d # of waiting customers = %10.4g |", - hour, minute, (float)sum/(64.0 * 64.0)); - printf(" # of cashiers = %d | ", - cashiers_load_h[(min-1)/60]); - printf("# of new customers/min ~= "); - switch (api){ - case HOST_API: - printf("%2.2f\n", 4.0); - break; - case SIMPLE_DEVICE_API: - printf("%2.2f\n", - 4*(sin((float)min/100.0)+1)); - break; - case ROBUST_DEVICE_API: - if (min <= 3 * 60){ - printf("%2.2f\n", 2.0); - }else{ - if (min <= 6 * 60){ - printf("%2.2f\n", 1.0); - }else{ - printf("%2.2f\n", 3.0); - } - } - break; - default: - fprintf(stderr, "Wrong API\n"); - } - } -} - -int main(int argc, char *argv[]) -{ - int n; - size_t pitch; - // CHECK: hiprandState *devStates; - curandState *devStates; - unsigned int *devResults, *hostResults; - unsigned int *poisson_numbers_d; - // CHECK: hiprandDiscreteDistribution_t poisson_1, poisson_2; - // CHECK: hiprandDiscreteDistribution_t poisson_3; - // CHECK: hiprandGenerator_t gen; - curandDiscreteDistribution_t poisson_1, poisson_2; - curandDiscreteDistribution_t poisson_3; - curandGenerator_t gen; - - /* Setting cashiers, report and API */ - settings(); - - /* Allocate space for results on device */ - // CHECK: CUDA_CALL(hipMallocPitch((void **)&devResults, &pitch, - CUDA_CALL(cudaMallocPitch((void **)&devResults, &pitch, - 64 * 64 * sizeof(unsigned int), 60 * HOURS)); - - /* Allocate space for results on host */ - hostResults = (unsigned int *)calloc(pitch * 60 * HOURS, - sizeof(unsigned int)); - - /* Allocate space for prng states on device */ - // CHECK: CUDA_CALL(hipMalloc((void **)&devStates, 64 * 64 * - // CHECK: sizeof(hiprandState))); - CUDA_CALL(cudaMalloc((void **)&devStates, 64 * 64 * - sizeof(curandState))); - - /* Setup prng states */ - if (api != HOST_API){ - // CHECK: hipLaunchKernelGGL(setup_kernel, dim3(64), dim3(64), 0, 0, devStates); - setup_kernel<<<64, 64>>>(devStates); - } - /* Simulate queue */ - switch (api){ - case HOST_API: - /* Create pseudo-random number generator */ - // CHECK: CURAND_CALL(hiprandCreateGenerator(&gen, - // CHECK: HIPRAND_RNG_PSEUDO_DEFAULT)); - CURAND_CALL(curandCreateGenerator(&gen, - CURAND_RNG_PSEUDO_DEFAULT)); - /* Set seed */ - // CHECK: CURAND_CALL(hiprandSetPseudoRandomGeneratorSeed( - CURAND_CALL(curandSetPseudoRandomGeneratorSeed( - gen, 1234ULL)); - /* compute n */ - n = 64 * 64 * HOURS * 60; - /* Allocate n unsigned ints on device */ - // CHECK: CUDA_CALL(hipMalloc((void **)&poisson_numbers_d, - CUDA_CALL(cudaMalloc((void **)&poisson_numbers_d, - n * sizeof(unsigned int))); - /* Generate n unsigned ints on device */ - // CHECK: CURAND_CALL(hiprandGeneratePoisson(gen, - CURAND_CALL(curandGeneratePoisson(gen, - poisson_numbers_d, n, 4.0)); - // CHECK: hipLaunchKernelGGL(host_API_kernel, dim3(64), dim3(64), 0, 0, poisson_numbers_d, - host_API_kernel<<<64, 64>>>(poisson_numbers_d, - devResults, pitch); - /* Cleanup */ - // CHECK: CURAND_CALL(hiprandDestroyGenerator(gen)); - CURAND_CALL(curandDestroyGenerator(gen)); - break; - case SIMPLE_DEVICE_API: - // CHECK: hipLaunchKernelGGL(simple_device_API_kernel, dim3(64), dim3(64), 0, 0, devStates, - simple_device_API_kernel<<<64, 64>>>(devStates, - devResults, pitch); - break; - case ROBUST_DEVICE_API: - /* Create histograms for Poisson(1) */ - // CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(1.0, - CURAND_CALL(curandCreatePoissonDistribution(1.0, - &poisson_1)); - /* Create histograms for Poisson(2) */ - // CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(2.0, - CURAND_CALL(curandCreatePoissonDistribution(2.0, - &poisson_2)); - /* Create histograms for Poisson(3) */ - // CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(3.0, - CURAND_CALL(curandCreatePoissonDistribution(3.0, - &poisson_3)); - // CHECK: hipLaunchKernelGGL(robust_device_API_kernel, dim3(64), dim3(64), 0, 0, devStates, - robust_device_API_kernel<<<64, 64>>>(devStates, - poisson_1, poisson_2, poisson_3, - devResults, pitch); - /* Cleanup */ - // CHECK: CURAND_CALL(hiprandDestroyDistribution(poisson_1)); - // CHECK: CURAND_CALL(hiprandDestroyDistribution(poisson_2)); - // CHECK: CURAND_CALL(hiprandDestroyDistribution(poisson_3)); - CURAND_CALL(curandDestroyDistribution(poisson_1)); - CURAND_CALL(curandDestroyDistribution(poisson_2)); - CURAND_CALL(curandDestroyDistribution(poisson_3)); - break; - default: - fprintf(stderr, "Wrong API\n"); - } - /* Copy device memory to host */ - // CHECK: CUDA_CALL(hipMemcpy2D(hostResults, pitch, devResults, - // CHECK: 60 * HOURS, hipMemcpyDeviceToHost)); - CUDA_CALL(cudaMemcpy2D(hostResults, pitch, devResults, - pitch, 64 * 64 * sizeof(unsigned int), - 60 * HOURS, cudaMemcpyDeviceToHost)); - /* Show result */ - print_statistics(hostResults, pitch); - /* Cleanup */ - // CHECK: CUDA_CALL(hipFree(devStates)); - // CHECK: CUDA_CALL(hipFree(devResults)); - CUDA_CALL(cudaFree(devStates)); - CUDA_CALL(cudaFree(devResults)); - free(hostResults); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu deleted file mode 100644 index 6e163d47a6..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu +++ /dev/null @@ -1,367 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -// CHECK: #include -#include -// CHECK: #include "hipsparse.h" -#include "cusparse.h" - -// CHECK: if (y) hipFree(y); -// CHECK: if (z) hipFree(z); -// CHECK: if (xInd) hipFree(xInd); -// CHECK: if (xVal) hipFree(xVal); -// CHECK: if (csrRowPtr) hipFree(csrRowPtr); -// CHECK: if (cooRowIndex) hipFree(cooRowIndex); -// CHECK: if (cooColIndex) hipFree(cooColIndex); -// CHECK: if (cooVal) hipFree(cooVal); -// CHECK: if (descr) hipsparseDestroyMatDescr(descr); -// CHECK: if (handle) hipsparseDestroy(handle); -// CHECK: hipDeviceReset(); -#define CLEANUP(s) \ -do { \ - printf ("%s\n", s); \ - if (yHostPtr) free(yHostPtr); \ - if (zHostPtr) free(zHostPtr); \ - if (xIndHostPtr) free(xIndHostPtr); \ - if (xValHostPtr) free(xValHostPtr); \ - if (cooRowIndexHostPtr) free(cooRowIndexHostPtr);\ - if (cooColIndexHostPtr) free(cooColIndexHostPtr);\ - if (cooValHostPtr) free(cooValHostPtr); \ - if (y) cudaFree(y); \ - if (z) cudaFree(z); \ - if (xInd) cudaFree(xInd); \ - if (xVal) cudaFree(xVal); \ - if (csrRowPtr) cudaFree(csrRowPtr); \ - if (cooRowIndex) cudaFree(cooRowIndex); \ - if (cooColIndex) cudaFree(cooColIndex); \ - if (cooVal) cudaFree(cooVal); \ - if (descr) cusparseDestroyMatDescr(descr);\ - if (handle) cusparseDestroy(handle); \ - cudaDeviceReset(); \ - fflush (stdout); \ -} while (0) - -int main(){ - // CHECK: hipError_t cudaStat1,cudaStat2,cudaStat3,cudaStat4,cudaStat5,cudaStat6; - cudaError_t cudaStat1,cudaStat2,cudaStat3,cudaStat4,cudaStat5,cudaStat6; - // CHECK: hipsparseStatus_t status; - cusparseStatus_t status; - // CHECK: hipsparseHandle_t handle=0; - cusparseHandle_t handle=0; - // CHECK: hipsparseMatDescr_t descr=0; - cusparseMatDescr_t descr=0; - int * cooRowIndexHostPtr=0; - int * cooColIndexHostPtr=0; - double * cooValHostPtr=0; - int * cooRowIndex=0; - int * cooColIndex=0; - double * cooVal=0; - int * xIndHostPtr=0; - double * xValHostPtr=0; - double * yHostPtr=0; - int * xInd=0; - double * xVal=0; - double * y=0; - int * csrRowPtr=0; - double * zHostPtr=0; - double * z=0; - int n, nnz, nnz_vector; - double dzero =0.0; - double dtwo =2.0; - double dthree=3.0; - double dfive =5.0; - printf("testing example\n"); - /* create the following sparse test matrix in COO format */ - /* |1.0 2.0 3.0| - | 4.0 | - |5.0 6.0 7.0| - | 8.0 9.0| */ - n=4; nnz=9; - cooRowIndexHostPtr = (int *) malloc(nnz*sizeof(cooRowIndexHostPtr[0])); - cooColIndexHostPtr = (int *) malloc(nnz*sizeof(cooColIndexHostPtr[0])); - cooValHostPtr = (double *)malloc(nnz*sizeof(cooValHostPtr[0])); - if ((!cooRowIndexHostPtr) || (!cooColIndexHostPtr) || (!cooValHostPtr)){ - CLEANUP("Host malloc failed (matrix)"); - return 1; - } - cooRowIndexHostPtr[0]=0; cooColIndexHostPtr[0]=0; cooValHostPtr[0]=1.0; - cooRowIndexHostPtr[1]=0; cooColIndexHostPtr[1]=2; cooValHostPtr[1]=2.0; - cooRowIndexHostPtr[2]=0; cooColIndexHostPtr[2]=3; cooValHostPtr[2]=3.0; - cooRowIndexHostPtr[3]=1; cooColIndexHostPtr[3]=1; cooValHostPtr[3]=4.0; - cooRowIndexHostPtr[4]=2; cooColIndexHostPtr[4]=0; cooValHostPtr[4]=5.0; - cooRowIndexHostPtr[5]=2; cooColIndexHostPtr[5]=2; cooValHostPtr[5]=6.0; - cooRowIndexHostPtr[6]=2; cooColIndexHostPtr[6]=3; cooValHostPtr[6]=7.0; - cooRowIndexHostPtr[7]=3; cooColIndexHostPtr[7]=1; cooValHostPtr[7]=8.0; - cooRowIndexHostPtr[8]=3; cooColIndexHostPtr[8]=3; cooValHostPtr[8]=9.0; - nnz_vector = 3; - xIndHostPtr = (int *) malloc(nnz_vector*sizeof(xIndHostPtr[0])); - xValHostPtr = (double *)malloc(nnz_vector*sizeof(xValHostPtr[0])); - yHostPtr = (double *)malloc(2*n *sizeof(yHostPtr[0])); - zHostPtr = (double *)malloc(2*(n+1) *sizeof(zHostPtr[0])); - if((!xIndHostPtr) || (!xValHostPtr) || (!yHostPtr) || (!zHostPtr)) { - CLEANUP("Host malloc failed (vectors)"); - return 1; - } - yHostPtr[0] = 10.0; - xIndHostPtr[0]=0; - xValHostPtr[0]=100.0; - yHostPtr[1] = 20.0; - xIndHostPtr[1]=1; - xValHostPtr[1]=200.0; - yHostPtr[2] = 30.0; - yHostPtr[3] = 40.0; - xIndHostPtr[2]=3; - xValHostPtr[2]=400.0; - yHostPtr[4] = 50.0; - yHostPtr[5] = 60.0; - yHostPtr[6] = 70.0; - yHostPtr[7] = 80.0; - /* allocate GPU memory and copy the matrix and vectors into it */ - // CHECK: cudaStat1 = hipMalloc((void**)&cooRowIndex,nnz*sizeof(cooRowIndex[0])); - cudaStat1 = cudaMalloc((void**)&cooRowIndex,nnz*sizeof(cooRowIndex[0])); - // CHECK: cudaStat2 = hipMalloc((void**)&cooColIndex,nnz*sizeof(cooColIndex[0])); - cudaStat2 = cudaMalloc((void**)&cooColIndex,nnz*sizeof(cooColIndex[0])); - // CHECK: cudaStat3 = hipMalloc((void**)&cooVal, nnz*sizeof(cooVal[0])); - cudaStat3 = cudaMalloc((void**)&cooVal, nnz*sizeof(cooVal[0])); - // CHECK: cudaStat4 = hipMalloc((void**)&y, 2*n*sizeof(y[0])); - cudaStat4 = cudaMalloc((void**)&y, 2*n*sizeof(y[0])); - // CHECK: cudaStat5 = hipMalloc((void**)&xInd,nnz_vector*sizeof(xInd[0])); - cudaStat5 = cudaMalloc((void**)&xInd,nnz_vector*sizeof(xInd[0])); - // CHECK: cudaStat6 = hipMalloc((void**)&xVal,nnz_vector*sizeof(xVal[0])); - cudaStat6 = cudaMalloc((void**)&xVal,nnz_vector*sizeof(xVal[0])); - // CHECK: if ((cudaStat1 != hipSuccess) || - // CHECK: (cudaStat2 != hipSuccess) || - // CHECK: (cudaStat3 != hipSuccess) || - // CHECK: (cudaStat4 != hipSuccess) || - // CHECK: (cudaStat5 != hipSuccess) || - // CHECK: (cudaStat6 != hipSuccess)) { - if ((cudaStat1 != cudaSuccess) || - (cudaStat2 != cudaSuccess) || - (cudaStat3 != cudaSuccess) || - (cudaStat4 != cudaSuccess) || - (cudaStat5 != cudaSuccess) || - (cudaStat6 != cudaSuccess)) { - CLEANUP("Device malloc failed"); - return 1; - } - // CHECK: cudaStat1 = hipMemcpy(cooRowIndex, cooRowIndexHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(cooRowIndex, cooRowIndexHostPtr, - (size_t)(nnz*sizeof(cooRowIndex[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat2 = hipMemcpy(cooColIndex, cooColIndexHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat2 = cudaMemcpy(cooColIndex, cooColIndexHostPtr, - (size_t)(nnz*sizeof(cooColIndex[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat3 = hipMemcpy(cooVal, cooValHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat3 = cudaMemcpy(cooVal, cooValHostPtr, - (size_t)(nnz*sizeof(cooVal[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat4 = hipMemcpy(y, yHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat4 = cudaMemcpy(y, yHostPtr, - (size_t)(2*n*sizeof(y[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat5 = hipMemcpy(xInd, xIndHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat5 = cudaMemcpy(xInd, xIndHostPtr, - (size_t)(nnz_vector*sizeof(xInd[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat6 = hipMemcpy(xVal, xValHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat6 = cudaMemcpy(xVal, xValHostPtr, - (size_t)(nnz_vector*sizeof(xVal[0])), - cudaMemcpyHostToDevice); - // CHECK: if ((cudaStat1 != hipSuccess) || - // CHECK: (cudaStat2 != hipSuccess) || - // CHECK: (cudaStat3 != hipSuccess) || - // CHECK: (cudaStat4 != hipSuccess) || - // CHECK: (cudaStat5 != hipSuccess) || - // CHECK: (cudaStat6 != hipSuccess)) { - if ((cudaStat1 != cudaSuccess) || - (cudaStat2 != cudaSuccess) || - (cudaStat3 != cudaSuccess) || - (cudaStat4 != cudaSuccess) || - (cudaStat5 != cudaSuccess) || - (cudaStat6 != cudaSuccess)) { - CLEANUP("Memcpy from Host to Device failed"); - return 1; - } - /* initialize cusparse library */ - // CHECK: status= hipsparseCreate(&handle); - status= cusparseCreate(&handle); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("CUSPARSE Library initialization failed"); - return 1; - } - /* create and setup matrix descriptor */ - // CHECK: status= hipsparseCreateMatDescr(&descr); - status= cusparseCreateMatDescr(&descr); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Matrix descriptor initialization failed"); - return 1; - } - // CHECK: hipsparseSetMatType(descr,HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: hipsparseSetMatIndexBase(descr,HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); - /* exercise conversion routines (convert matrix from COO 2 CSR format) */ - // CHECK: cudaStat1 = hipMalloc((void**)&csrRowPtr,(n+1)*sizeof(csrRowPtr[0])); - cudaStat1 = cudaMalloc((void**)&csrRowPtr,(n+1)*sizeof(csrRowPtr[0])); - // CHECK: if (cudaStat1 != hipSuccess) { - if (cudaStat1 != cudaSuccess) { - CLEANUP("Device malloc failed (csrRowPtr)"); - return 1; - } - status= cusparseXcoo2csr(handle,cooRowIndex,nnz,n, - // CHECK: csrRowPtr,HIPSPARSE_INDEX_BASE_ZERO); - csrRowPtr,CUSPARSE_INDEX_BASE_ZERO); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Conversion from COO to CSR format failed"); - return 1; - } - //csrRowPtr = [0 3 4 7 9] - // The following test only works for compute capability 1.3 and above - // because it needs double precision. - int devId; - // CHECK: hipDeviceProp_t prop; - cudaDeviceProp prop; - // CHECK: hipError_t cudaStat; - cudaError_t cudaStat; - // CHECK: cudaStat = hipGetDevice(&devId); - cudaStat = cudaGetDevice(&devId); - // CHECK: if (hipSuccess != cudaStat){ - if (cudaSuccess != cudaStat){ - // CLEANUP("hipGetDevice failed"); - CLEANUP("cudaGetDevice failed"); - // printf("Error: cudaStat %d, %s\n", cudaStat, hipGetErrorString(cudaStat)); - printf("Error: cudaStat %d, %s\n", cudaStat, cudaGetErrorString(cudaStat)); - return 1; - } - // CHECK: cudaStat = hipGetDeviceProperties( &prop, devId); - cudaStat = cudaGetDeviceProperties( &prop, devId); - // CHECK: if (hipSuccess != cudaStat) { - if (cudaSuccess != cudaStat) { - // CHECK: CLEANUP("hipGetDeviceProperties failed"); - CLEANUP("cudaGetDeviceProperties failed"); - // CHECK: printf("Error: cudaStat %d, %s\n", cudaStat, hipGetErrorString(cudaStat)); - printf("Error: cudaStat %d, %s\n", cudaStat, cudaGetErrorString(cudaStat)); - return 1; - } - int cc = 100*prop.major + 10*prop.minor; - if (cc < 130){ - CLEANUP("waive the test because only sm13 and above are supported\n"); - printf("the device has compute capability %d\n", cc); - printf("example test WAIVED"); - return 2; - } - /* exercise Level 1 routines (scatter vector elements) */ - // CHECK: status= hipsparseDsctr(handle, nnz_vector, xVal, xInd, - // CHECK: &y[n], HIPSPARSE_INDEX_BASE_ZERO); - status= cusparseDsctr(handle, nnz_vector, xVal, xInd, - &y[n], CUSPARSE_INDEX_BASE_ZERO); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Scatter from sparse to dense vector failed"); - return 1; - } - //y = [10 20 30 40 | 100 200 70 400] - /* exercise Level 2 routines (csrmv) */ - // CHECK: status= hipsparseDcsrmv(handle,HIPSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz, - status= cusparseDcsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz, - &dtwo, descr, cooVal, csrRowPtr, cooColIndex, - &y[0], &dthree, &y[n]); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Matrix-vector multiplication failed"); - return 1; - } - //y = [10 20 30 40 | 680 760 1230 2240] - // CHECK: hipMemcpy(yHostPtr, y, (size_t)(2*n*sizeof(y[0])), hipMemcpyDeviceToHost); - cudaMemcpy(yHostPtr, y, (size_t)(2*n*sizeof(y[0])), cudaMemcpyDeviceToHost); - /* exercise Level 3 routines (csrmm) */ - // cudaStat1 = hipMalloc((void**)&z, 2*(n+1)*sizeof(z[0])); - cudaStat1 = cudaMalloc((void**)&z, 2*(n+1)*sizeof(z[0])); - // CHECK: if (cudaStat1 != hipSuccess) { - if (cudaStat1 != cudaSuccess) { - CLEANUP("Device malloc failed (z)"); - return 1; - } - // CHECK: cudaStat1 = hipMemset((void *)z,0, 2*(n+1)*sizeof(z[0])); - cudaStat1 = cudaMemset((void *)z,0, 2*(n+1)*sizeof(z[0])); - // CHECK: if (cudaStat1 != hipSuccess) { - if (cudaStat1 != cudaSuccess) { - CLEANUP("Memset on Device failed"); - return 1; - } - // CHECK: status= hipsparseDcsrmm(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n, - status= cusparseDcsrmm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n, - nnz, &dfive, descr, cooVal, csrRowPtr, cooColIndex, - y, n, &dzero, z, n+1); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Matrix-matrix multiplication failed"); - return 1; - } - /* print final results (z) */ - // CHECK: cudaStat1 = hipMemcpy(zHostPtr, z, - // CHECK: hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(zHostPtr, z, - (size_t)(2*(n+1)*sizeof(z[0])), - cudaMemcpyDeviceToHost); - // CHECK: if (cudaStat1 != hipSuccess) { - if (cudaStat1 != cudaSuccess) { - CLEANUP("Memcpy from Device to Host failed"); - return 1; - } - //z = [950 400 2550 2600 0 | 49300 15200 132300 131200 0] - /* destroy matrix descriptor */ - // status = hipsparseDestroyMatDescr(descr); - status = cusparseDestroyMatDescr(descr); - descr = 0; - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Matrix descriptor destruction failed"); - return 1; - } - /* destroy handle */ - // CHECK: status = hipsparseDestroy(handle); - status = cusparseDestroy(handle); - handle = 0; - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("CUSPARSE Library release of resources failed"); - return 1; - } - /* check the results */ - // Notice that CLEANUP() contains a call to cusparseDestroy(handle) - if ((zHostPtr[0] != 950.0) || - (zHostPtr[1] != 400.0) || - (zHostPtr[2] != 2550.0) || - (zHostPtr[3] != 2600.0) || - (zHostPtr[4] != 0.0) || - (zHostPtr[5] != 49300.0) || - (zHostPtr[6] != 15200.0) || - (zHostPtr[7] != 132300.0) || - (zHostPtr[8] != 131200.0) || - (zHostPtr[9] != 0.0) || - (yHostPtr[0] != 10.0) || - (yHostPtr[1] != 20.0) || - (yHostPtr[2] != 30.0) || - (yHostPtr[3] != 40.0) || - (yHostPtr[4] != 680.0) || - (yHostPtr[5] != 760.0) || - (yHostPtr[6] != 1230.0) || - (yHostPtr[7] != 2240.0)) { - CLEANUP("example test FAILED"); - return 1; - } else { - CLEANUP("example test PASSED"); - return 0; - } -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu deleted file mode 100644 index 57b2c61098..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu +++ /dev/null @@ -1,284 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include "hipsparse.h" -#include "cusparse.h" - -void printMatrix(int m, int n, const double*A, int lda, const char* name) -{ - for(int row = 0 ; row < m ; row++){ - for(int col = 0 ; col < n ; col++){ - double Areg = A[row + col*lda]; - printf("%s(%d,%d) = %f\n", name, row+1, col+1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipblasHandle_t cublasH = NULL; - cublasHandle_t cublasH = NULL; - // CHECK: hipsparseHandle_t cusparseH = NULL; - cusparseHandle_t cusparseH = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrA = NULL; - cusparseMatDescr_t descrA = NULL; - // CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS; - cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS; - // CHECK: hipsparseStatus_t cusparseStat = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t cusparseStat = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - // CHECK: hipError_t cudaStat2 = hipSuccess; - // CHECK: hipError_t cudaStat3 = hipSuccess; - // CHECK: hipError_t cudaStat4 = hipSuccess; - // CHECK: hipError_t cudaStat5 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - cudaError_t cudaStat2 = cudaSuccess; - cudaError_t cudaStat3 = cudaSuccess; - cudaError_t cudaStat4 = cudaSuccess; - cudaError_t cudaStat5 = cudaSuccess; - const int n = 4; - const int nnzA = 9; -/* - * | 1 0 2 3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - * eigevales are { -0.5311, 7.5311, 9.0000, 4.0000 } - * - * The largest eigenvaluse is 9 and corresponding eigenvector is - * - * | 0.3029 | - * v = | 0 | - * | 0.9350 | - * | 0.1844 | - */ - const int csrRowPtrA[n+1] = { 0, 3, 4, 7, 9 }; - const int csrColIndA[nnzA] = {0, 2, 3, 1, 0, 2, 3, 1, 3 }; - const double csrValA[nnzA] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; - const double lambda_exact[n] = { 9.0000, 7.5311, 4.0000, -0.5311 }; - const double x0[n] = {1.0, 2.0, 3.0, 4.0 }; /* initial guess */ - double x[n]; /* numerical eigenvector */ - - int *d_csrRowPtrA = NULL; - int *d_csrColIndA = NULL; - double *d_csrValA = NULL; - - double *d_x = NULL; /* eigenvector */ - double *d_y = NULL; /* workspace */ - - const double tol = 1.e-6; - const int max_ites = 30; - - const double h_one = 1.0; - const double h_zero = 0.0; - - printf("example of csrmv_mp \n"); - printf("tol = %E \n", tol); - printf("max. iterations = %d \n", max_ites); - - printf("1st eigenvaluse is %f\n", lambda_exact[0] ); - printf("2nd eigenvaluse is %f\n", lambda_exact[1] ); - - double alpha = lambda_exact[1]/lambda_exact[0] ; - printf("convergence rate is %f\n", alpha ); - - double est_iterations = log(tol)/log(alpha); - printf("# of iterations required is %d\n", (int)ceil(est_iterations)); - - // step 1: create cublas/cusparse handle, bind a stream - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cublasStat = hipblasCreate(&cublasH); - cublasStat = cublasCreate(&cublasH); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cublasStat = hipblasSetStream(cublasH, stream); - cublasStat = cublasSetStream(cublasH, stream); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cusparseStat = hipsparseCreate(&cusparseH); - cusparseStat = cusparseCreate(&cusparseH); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat); - assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); - // CHECK: cusparseStat = hipsparseSetStream(cusparseH, stream); - cusparseStat = cusparseSetStream(cusparseH, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat); - assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); - - // step 2: configuration of matrix A - // CHECK: cusparseStat = hipsparseCreateMatDescr(&descrA); - cusparseStat = cusparseCreateMatDescr(&descrA); - // assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat); - assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); - // CHECK: hipsparseSetMatIndexBase(descrA,HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL ); - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL ); - - // step 3: copy A and x0 to device - // CHECK: cudaStat1 = hipMalloc ((void**)&d_csrRowPtrA, sizeof(int) * (n+1) ); - cudaStat1 = cudaMalloc ((void**)&d_csrRowPtrA, sizeof(int) * (n+1) ); - // CHECK: cudaStat2 = hipMalloc ((void**)&d_csrColIndA, sizeof(int) * nnzA ); - cudaStat2 = cudaMalloc ((void**)&d_csrColIndA, sizeof(int) * nnzA ); - // CHECK: cudaStat3 = hipMalloc ((void**)&d_csrValA , sizeof(double) * nnzA ); - cudaStat3 = cudaMalloc ((void**)&d_csrValA , sizeof(double) * nnzA ); - // CHECK: cudaStat4 = hipMalloc ((void**)&d_x , sizeof(double) * n ); - cudaStat4 = cudaMalloc ((void**)&d_x , sizeof(double) * n ); - // CHECK: cudaStat5 = hipMalloc ((void**)&d_y , sizeof(double) * n ); - cudaStat5 = cudaMalloc ((void**)&d_y , sizeof(double) * n ); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - // CHECK: assert(hipSuccess == cudaStat4); - // CHECK: assert(hipSuccess == cudaStat5); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - assert(cudaSuccess == cudaStat4); - assert(cudaSuccess == cudaStat5); - - // CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int) * (n+1) , hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int) * (n+1) , cudaMemcpyHostToDevice); - // CHECK: cudaStat2 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int) * nnzA , hipMemcpyHostToDevice); - cudaStat2 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int) * nnzA , cudaMemcpyHostToDevice); - // CHECK: cudaStat3 = hipMemcpy(d_csrValA , csrValA , sizeof(double) * nnzA , hipMemcpyHostToDevice); - cudaStat3 = cudaMemcpy(d_csrValA , csrValA , sizeof(double) * nnzA , cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - assert(cudaSuccess == cudaStat3); - - // step 4: power method - double lambda = 0.0; - double lambda_next = 0.0; - - // 4.1: initial guess x0 - cudaStat1 = cudaMemcpy(d_x, x0, sizeof(double) * n, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - for(int ite = 0 ; ite < max_ites ; ite++ ){ - // 4.2: normalize vector x - // x = x / |x| - double nrm2_x; - // TODO: cublasStat = hipblasDnrm2_v2(cublasH, - cublasStat = cublasDnrm2_v2(cublasH, - n, - d_x, - 1, // incx, - &nrm2_x /* host pointer */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - double one_over_nrm2_x = 1.0 / nrm2_x; - // TODO: cublasStat = hipblasDscal_v2( cublasH, - cublasStat = cublasDscal_v2( cublasH, - n, - &one_over_nrm2_x, /* host pointer */ - d_x, - 1 // incx - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - // 4.3: y = A*x - // TODO: hipsparseStat = cusparseDcsrmv_mp(cusparseH, - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE - cusparseStat = cusparseDcsrmv_mp(cusparseH, - CUSPARSE_OPERATION_NON_TRANSPOSE, - n, - n, - nnzA, - &h_one, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_x, - &h_zero, - d_y); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat); - assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); - - // 4.4: lambda = y**T*x - // TODO: cublasStat = hipblasDdot_v2 ( cublasH, - cublasStat = cublasDdot_v2 ( cublasH, - n, - d_x, - 1, // incx, - d_y, - 1, // incy, - &lambda_next /* host pointer */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - double lambda_err = fabs( lambda_next - lambda_exact[0] ); - printf("ite %d: lambda = %f, error = %E\n", ite, lambda_next, lambda_err ); - - // 4.5: check if converges - if ( (ite > 0) && - fabs( lambda - lambda_next ) < tol - ){ - break; // converges - } - - /* - * 4.6: x := y - * lambda = lambda_next - * - * so new approximation is (lambda, x), x is not normalized. - */ - // CHECK: cudaStat1 = hipMemcpy(d_x, d_y, sizeof(double) * n , hipMemcpyDeviceToDevice); - cudaStat1 = cudaMemcpy(d_x, d_y, sizeof(double) * n , cudaMemcpyDeviceToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - lambda = lambda_next; - } - // step 5: report eigen-pair - // CHECK: cudaStat1 = hipMemcpy(x, d_x, sizeof(double) * n, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(x, d_x, sizeof(double) * n, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printf("largest eigenvalue is %E\n", lambda ); - printf("eigenvector = (matlab base-1)\n"); - printMatrix(n, 1, x, n, "V0"); - printf("=====\n"); - - // free resources - // CHECK: if (d_csrRowPtrA ) hipFree(d_csrRowPtrA); - if (d_csrRowPtrA ) cudaFree(d_csrRowPtrA); - // CHECK: if (d_csrColIndA ) hipFree(d_csrColIndA); - if (d_csrColIndA ) cudaFree(d_csrColIndA); - // CHECK: if (d_csrValA ) hipFree(d_csrValA); - if (d_csrValA ) cudaFree(d_csrValA); - // CHECK: if (d_x ) hipFree(d_x); - if (d_x ) cudaFree(d_x); - // CHeCK: if (d_y ) hipFree(d_y); - if (d_y ) cudaFree(d_y); - // CHECK: if (cublasH ) hipblasDestroy(cublasH); - if (cublasH ) cublasDestroy(cublasH); - // CHECK: if (cusparseH ) hipsparseDestroy(cusparseH); - if (cusparseH ) cusparseDestroy(cusparseH); - // CHECK: if (stream ) hipStreamDestroy(stream); - if (stream ) cudaStreamDestroy(stream); - // CHECK: if (descrA ) hipsparseDestroyMatDescr(descrA); - if (descrA ) cusparseDestroyMatDescr(descrA); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu deleted file mode 100644 index 8618be4748..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu +++ /dev/null @@ -1,229 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "--skip-excluded-preprocessor-conditional-blocks" %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include "hipsparse.h" -#include "cusparse.h" - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - // CHECK: hipError_t cudaStat2 = hipSuccess; - // CHECK: hipError_t cudaStat3 = hipSuccess; - // CHECK: hipError_t cudaStat4 = hipSuccess; - // CHECK: hipError_t cudaStat5 = hipSuccess; - // CHECK: hipError_t cudaStat6 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - cudaError_t cudaStat2 = cudaSuccess; - cudaError_t cudaStat3 = cudaSuccess; - cudaError_t cudaStat4 = cudaSuccess; - cudaError_t cudaStat5 = cudaSuccess; - cudaError_t cudaStat6 = cudaSuccess; - - /* - * A is a 3x3 sparse matrix - * | 1 2 0 | - * A = | 0 5 0 | - * | 0 8 0 | - */ - const int m = 3; - const int n = 3; - const int nnz = 4; - -#if 0 - /* index starts at 0 */ - int h_cooRows[nnz] = { 2, 1, 0, 0 }; - int h_cooCols[nnz] = { 1, 1, 0, 1 }; -#else - /* index starts at -2 */ - int h_cooRows[nnz] = { 0, -1, -2, -2 }; - int h_cooCols[nnz] = { -1, -1, -2, -1 }; -#endif - double h_cooVals[nnz] = { 8.0, 5.0, 1.0, 2.0 }; - int h_P[nnz]; - - int *d_cooRows = NULL; - int *d_cooCols = NULL; - int *d_P = NULL; - double *d_cooVals = NULL; - double *d_cooVals_sorted = NULL; - size_t pBufferSizeInBytes = 0; - void *pBuffer = NULL; - - printf("m = %d, n = %d, nnz=%d \n", m, n, nnz); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: allocate buffer */ - // TODO: status = hipsparseXcoosort_bufferSizeExt( - status = cusparseXcoosort_bufferSizeExt( - handle, - m, - n, - nnz, - d_cooRows, - d_cooCols, - &pBufferSizeInBytes - ); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("pBufferSizeInBytes = %lld bytes \n", (long long)pBufferSizeInBytes); - - // CHECK: cudaStat1 = hipMalloc(&d_cooRows, sizeof(int)*nnz); - cudaStat1 = cudaMalloc(&d_cooRows, sizeof(int)*nnz); - // CHECK: cudaStat2 = hipMalloc(&d_cooCols, sizeof(int)*nnz); - cudaStat2 = cudaMalloc(&d_cooCols, sizeof(int)*nnz); - // CHECK: cudaStat3 = hipMalloc(&d_P, sizeof(int)*nnz); - cudaStat3 = cudaMalloc(&d_P, sizeof(int)*nnz); - // CHECK: cudaStat4 = hipMalloc(&d_cooVals, sizeof(double)*nnz); - cudaStat4 = cudaMalloc(&d_cooVals, sizeof(double)*nnz); - // CHECK: cudaStat5 = hipMalloc(&d_cooVals_sorted, sizeof(double)*nnz); - cudaStat5 = cudaMalloc(&d_cooVals_sorted, sizeof(double)*nnz); - // CHECK: cudaStat6 = hipMalloc(&pBuffer, sizeof(char)* pBufferSizeInBytes); - cudaStat6 = cudaMalloc(&pBuffer, sizeof(char)* pBufferSizeInBytes); - - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - // CHECK: assert(hipSuccess == cudaStat4); - // CHECK: assert(hipSuccess == cudaStat5); - // CHECK: assert(hipSuccess == cudaStat6); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - assert(cudaSuccess == cudaStat4); - assert(cudaSuccess == cudaStat5); - assert(cudaSuccess == cudaStat6); - - // CHECK: cudaStat1 = hipMemcpy(d_cooRows, h_cooRows, sizeof(int)*nnz, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_cooRows, h_cooRows, sizeof(int)*nnz, cudaMemcpyHostToDevice); - // CHECK: cudaStat2 = hipMemcpy(d_cooCols, h_cooCols, sizeof(int)*nnz, hipMemcpyHostToDevice); - cudaStat2 = cudaMemcpy(d_cooCols, h_cooCols, sizeof(int)*nnz, cudaMemcpyHostToDevice); - // CHECK: cudaStat3 = hipMemcpy(d_cooVals, h_cooVals, sizeof(double)*nnz, hipMemcpyHostToDevice); - cudaStat3 = cudaMemcpy(d_cooVals, h_cooVals, sizeof(double)*nnz, cudaMemcpyHostToDevice); - // CHECK: cudaStat4 = hipDeviceSynchronize(); - cudaStat4 = cudaDeviceSynchronize(); - - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - // CHECK: assert(hipSuccess == cudaStat4); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - assert(cudaSuccess == cudaStat4); - - /* step 3: setup permutation vector P to identity */ - // TODO: status = hipsparseCreateIdentityPermutation( - status = cusparseCreateIdentityPermutation( - handle, - nnz, - d_P); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 4: sort COO format by Row */ - // TODO: status = hipsparseXcoosortByRow( - status = cusparseXcoosortByRow( - handle, - m, - n, - nnz, - d_cooRows, - d_cooCols, - d_P, - pBuffer - ); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 5: gather sorted cooVals */ - // CHECK: status = hipsparseDgthr( - // CHECK: HIPSPARSE_INDEX_BASE_ZERO - status = cusparseDgthr( - handle, - nnz, - d_cooVals, - d_cooVals_sorted, - d_P, - CUSPARSE_INDEX_BASE_ZERO - ); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* wait until the computation is done */ - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: cudaStat2 = hipMemcpy(h_cooRows, d_cooRows, sizeof(int)*nnz, hipMemcpyDeviceToHost); - cudaStat2 = cudaMemcpy(h_cooRows, d_cooRows, sizeof(int)*nnz, cudaMemcpyDeviceToHost); - // CHECK: cudaStat3 = hipMemcpy(h_cooCols, d_cooCols, sizeof(int)*nnz, hipMemcpyDeviceToHost); - cudaStat3 = cudaMemcpy(h_cooCols, d_cooCols, sizeof(int)*nnz, cudaMemcpyDeviceToHost); - // CHECK: cudaStat4 = hipMemcpy(h_P, d_P, sizeof(int)*nnz, hipMemcpyDeviceToHost); - cudaStat4 = cudaMemcpy(h_P, d_P, sizeof(int)*nnz, cudaMemcpyDeviceToHost); - // CHECK: cudaStat5 = hipMemcpy(h_cooVals, d_cooVals_sorted, sizeof(double)*nnz, hipMemcpyDeviceToHost); - cudaStat5 = cudaMemcpy(h_cooVals, d_cooVals_sorted, sizeof(double)*nnz, cudaMemcpyDeviceToHost); - // CHECK: cudaStat6 = hipDeviceSynchronize(); - cudaStat6 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - // CHECK: assert(hipSuccess == cudaStat4); - // CHECK: assert(hipSuccess == cudaStat5); - // CHECK: assert(hipSuccess == cudaStat6); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - assert(cudaSuccess == cudaStat4); - assert(cudaSuccess == cudaStat5); - assert(cudaSuccess == cudaStat6); - - printf("sorted coo: \n"); - for (int j = 0; j < nnz; j++) { - printf("(%d, %d, %f) \n", h_cooRows[j], h_cooCols[j], h_cooVals[j]); - } - - for (int j = 0; j < nnz; j++) { - printf("P[%d] = %d \n", j, h_P[j]); - } - - /* free resources */ - // CHECK: if (d_cooRows) hipFree(d_cooRows); - if (d_cooRows) cudaFree(d_cooRows); - // CHECK: if (d_cooCols) hipFree(d_cooCols); - if (d_cooCols) cudaFree(d_cooCols); - // CHECK: if (d_P) hipFree(d_P); - if (d_P) cudaFree(d_P); - // CHECK: if (d_cooVals) hipFree(d_cooVals); - if (d_cooVals) cudaFree(d_cooVals); - // CHECK: if (d_cooVals_sorted) hipFree(d_cooVals_sorted); - if (d_cooVals_sorted) cudaFree(d_cooVals_sorted); - // CHECK: if (pBuffer) hipFree(pBuffer); - if (pBuffer) cudaFree(pBuffer); - // if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu deleted file mode 100644 index 18ba9b006c..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu +++ /dev/null @@ -1,261 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -void printMatrix(int m, int n, const float*A, int lda, const char* name) -{ - for (int row = 0; row < m; row++) { - for (int col = 0; col < n; col++) { - float Areg = A[row + col * lda]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -void printCsr( - int m, - int n, - int nnz, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const char* name) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - - printf("matrix %s is %d-by-%d, nnz=%d, base=%d\n", name, m, n, nnz, base); - for (int row = 0; row < m; row++) { - const int start = csrRowPtrA[row] - base; - const int end = csrRowPtrA[row + 1] - base; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColIndA[colidx] - base; - const float Areg = csrValA[colidx]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrC = NULL; - cusparseMatDescr_t descrC = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - // CHECK: hipError_t cudaStat2 = hipSuccess; - // CHECK: hipError_t cudaStat3 = hipSuccess; - // CHECK: hipError_t cudaStat4 = hipSuccess; - // CHECK: hipError_t cudaStat5 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - cudaError_t cudaStat2 = cudaSuccess; - cudaError_t cudaStat3 = cudaSuccess; - cudaError_t cudaStat4 = cudaSuccess; - cudaError_t cudaStat5 = cudaSuccess; - const int m = 4; - const int n = 4; - const int lda = m; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - */ - const float A[lda*n] = { 1, 0, 5, 0, 0, 4, 0, 8, 2, 0, 6, 0, -3, 0, 7, 9 }; - int* csrRowPtrC = NULL; - int* csrColIndC = NULL; - float* csrValC = NULL; - - float *d_A = NULL; - int *d_csrRowPtrC = NULL; - int *d_csrColIndC = NULL; - float *d_csrValC = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - int nnzC = 0; - - float threshold = 4.1; /* remove Aij <= 4.1 */ -// float threshold = 0; /* remove zeros */ - - printf("example of pruneDense2csr \n"); - - printf("prune |A(i,j)| <= threshold \n"); - printf("threshold = %E \n", threshold); - - printMatrix(m, n, A, lda, "A"); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix C */ - // CHECK: status = hipsparseCreateMatDescr(&descrC); - status = cusparseCreateMatDescr(&descrC); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: cudaStat1 = hipMalloc((void**)&d_A, sizeof(float)*lda*n); - cudaStat1 = cudaMalloc((void**)&d_A, sizeof(float)*lda*n); - // CHECK: cudaStat2 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - cudaStat2 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - - /* step 3: query workspace */ - // CHECK: cudaStat1 = hipMemcpy(d_A, A, sizeof(float)*lda*n, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_A, A, sizeof(float)*lda*n, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // TODO: status = hipsparseSpruneDense2csr_bufferSizeExt( - status = cusparseSpruneDense2csr_bufferSizeExt( - handle, - m, - n, - d_A, - lda, - &threshold, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes (prune) = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: compute csrRowPtrC and nnzC */ - // TODO: status = hipsparseSpruneDense2csrNnz( - status = cusparseSpruneDense2csrNnz( - handle, - m, - n, - d_A, - lda, - &threshold, - descrC, - d_csrRowPtrC, - &nnzC, /* host */ - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printf("nnzC = %d\n", nnzC); - if (0 == nnzC) { - printf("C is empty \n"); - return 0; - } - - /* step 5: compute csrColIndC and csrValC */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - // CHECK: cudaStat2 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - cudaStat2 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - // TODO: status = hipsparseSpruneDense2csr( - status = cusparseSpruneDense2csr( - handle, - m, - n, - d_A, - lda, - &threshold, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: output C */ - csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); - csrColIndC = (int*)malloc(sizeof(int)*nnzC); - csrValC = (float*)malloc(sizeof(float)*nnzC); - assert(NULL != csrRowPtrC); - assert(NULL != csrColIndC); - assert(NULL != csrValC); - // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); - // CHECK: cudaStat2 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); - cudaStat2 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: cudaStat3 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); - cudaStat3 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - - printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); - - /* free resources */ - // CHECK: if (d_A) hipFree(d_A); - if (d_A) cudaFree(d_A); - // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); - if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); - // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); - if (d_csrColIndC) cudaFree(d_csrColIndC); - // CHECK: if (d_csrValC) hipFree(d_csrValC); - if (d_csrValC) cudaFree(d_csrValC); - - if (csrRowPtrC) free(csrRowPtrC); - if (csrColIndC) free(csrColIndC); - if (csrValC) free(csrValC); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); - if (descrC) cusparseDestroyMatDescr(descrC); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu deleted file mode 100644 index 1155af09e1..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu +++ /dev/null @@ -1,288 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -void printCsr( - int m, - int n, - int nnz, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const char* name) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - - printf("matrix %s is %d-by-%d, nnz=%d, base=%d, output base-1\n", name, m, n, nnz, base); - for (int row = 0; row < m; row++) { - const int start = csrRowPtrA[row] - base; - const int end = csrRowPtrA[row + 1] - base; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColIndA[colidx] - base; - const float Areg = csrValA[colidx]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrA = NULL; - cusparseMatDescr_t descrA = NULL; - // CHECK: hipsparseMatDescr_t descrC = NULL; - cusparseMatDescr_t descrC = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - const int m = 4; - const int n = 4; - const int nnzA = 9; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - */ - - const int csrRowPtrA[m + 1] = { 1, 4, 5, 8, 10 }; - const int csrColIndA[nnzA] = { 1, 3, 4, 2, 1, 3, 4, 2, 4 }; - const float csrValA[nnzA] = { 1, 2, -3, 4, 5, 6, 7, 8, 9 }; - - int* csrRowPtrC = NULL; - int* csrColIndC = NULL; - float* csrValC = NULL; - - int *d_csrRowPtrA = NULL; - int *d_csrColIndA = NULL; - float *d_csrValA = NULL; - - int *d_csrRowPtrC = NULL; - int *d_csrColIndC = NULL; - float *d_csrValC = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - int nnzC = 0; - - float threshold = 4.1; /* remove Aij <= 4.1 */ -// float threshold = 0; /* remove zeros */ - - printf("example of pruneCsr2csr \n"); - - printf("prune |A(i,j)| <= threshold \n"); - printf("threshold = %E \n", threshold); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix A and C */ - // CHECK: status = hipsparseCreateMatDescr(&descrA); - status = cusparseCreateMatDescr(&descrA); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* A is base-1*/ - // CHECK: hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ONE); - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE); - // CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: status = hipsparseCreateMatDescr(&descrC); - status = cusparseCreateMatDescr(&descrC); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* C is base-0 */ - // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); - - printCsr(m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, "A"); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: query workspace */ - // TODO: status = hipsparseSpruneCsr2csr_bufferSizeExt( - status = cusparseSpruneCsr2csr_bufferSizeExt( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - &threshold, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes (prune) = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: compute csrRowPtrC and nnzC */ - // TODO: status = hipsparseSpruneCsr2csrNnz( - status = cusparseSpruneCsr2csrNnz( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - &threshold, - descrC, - d_csrRowPtrC, - &nnzC, /* host */ - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - printf("nnzC = %d\n", nnzC); - if (0 == nnzC) { - printf("C is empty \n"); - return 0; - } - /* step 5: compute csrColIndC and csrValC */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // TODO: status = hipsparseSpruneCsr2csr( - status = cusparseSpruneCsr2csr( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - &threshold, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: output C */ - csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); - csrColIndC = (int*)malloc(sizeof(int)*nnzC); - csrValC = (float*)malloc(sizeof(float)*nnzC); - assert(NULL != csrRowPtrC); - assert(NULL != csrColIndC); - assert(NULL != csrValC); - // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); - /* free resources */ - // CHECK: if (d_csrRowPtrA) hipFree(d_csrRowPtrA); - if (d_csrRowPtrA) cudaFree(d_csrRowPtrA); - // CHECK: if (d_csrColIndA) hipFree(d_csrColIndA); - if (d_csrColIndA) cudaFree(d_csrColIndA); - // CHECK: if (d_csrValA) hipFree(d_csrValA); - if (d_csrValA) cudaFree(d_csrValA); - // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); - if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); - // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); - if (d_csrColIndC) cudaFree(d_csrColIndC); - // CHECK: if (d_csrValC) hipFree(d_csrValC); - if (d_csrValC) cudaFree(d_csrValC); - if (csrRowPtrC) free(csrRowPtrC); - if (csrColIndC) free(csrColIndC); - if (csrValC) free(csrValC); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrA) hipsparseDestroyMatDescr(descrA); - if (descrA) cusparseDestroyMatDescr(descrA); - // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); - if (descrC) cusparseDestroyMatDescr(descrC); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu deleted file mode 100644 index 872750a5d5..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu +++ /dev/null @@ -1,269 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -void printMatrix(int m, int n, const float*A, int lda, const char* name) -{ - for (int row = 0; row < m; row++) { - for (int col = 0; col < n; col++) { - float Areg = A[row + col * lda]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -void printCsr( - int m, - int n, - int nnz, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const char* name) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - - printf("matrix %s is %d-by-%d, nnz=%d, base=%d, output base-1\n", name, m, n, nnz, base); - for (int row = 0; row < m; row++) { - const int start = csrRowPtrA[row] - base; - const int end = csrRowPtrA[row + 1] - base; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColIndA[colidx] - base; - const float Areg = csrValA[colidx]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrC = NULL; - cusparseMatDescr_t descrC = NULL; - pruneInfo_t info = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - // CHECK: hipError_t cudaStat2 = hipSuccess; - // CHECK: hipError_t cudaStat3 = hipSuccess; - // CHECK: hipError_t cudaStat4 = hipSuccess; - // CHECK: hipError_t cudaStat5 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - cudaError_t cudaStat2 = cudaSuccess; - cudaError_t cudaStat3 = cudaSuccess; - cudaError_t cudaStat4 = cudaSuccess; - cudaError_t cudaStat5 = cudaSuccess; - const int m = 4; - const int n = 4; - const int lda = m; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - */ - const float A[lda*n] = { 1, 0, 5, 0, 0, 4, 0, 8, 2, 0, 6, 0, -3, 0, 7, 9 }; - int* csrRowPtrC = NULL; - int* csrColIndC = NULL; - float* csrValC = NULL; - - float *d_A = NULL; - int *d_csrRowPtrC = NULL; - int *d_csrColIndC = NULL; - float *d_csrValC = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - int nnzC = 0; - - float percentage = 50; /* 50% of nnz */ - - printf("example of pruneDense2csrByPercentage \n"); - - printf("prune out %.1f percentage of A \n", percentage); - - printMatrix(m, n, A, lda, "A"); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // TODO: status = hipsparseCreatePruneInfo(&info); - status = cusparseCreatePruneInfo(&info); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix C */ - // CHECK: status = hipsparseCreateMatDescr(&descrC); - status = cusparseCreateMatDescr(&descrC); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: cudaStat1 = hipMalloc((void**)&d_A, sizeof(float)*lda*n); - cudaStat1 = cudaMalloc((void**)&d_A, sizeof(float)*lda*n); - // CHECK: cudaStat2 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - cudaStat2 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - // CHECK: cudaStat1 = hipMemcpy(d_A, A, sizeof(float)*lda*n, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_A, A, sizeof(float)*lda*n, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - /* step 3: query workspace */ - // TODO: status = hipsparseSpruneDense2csrByPercentage_bufferSizeExt( - status = cusparseSpruneDense2csrByPercentage_bufferSizeExt( - handle, - m, - n, - d_A, - lda, - percentage, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - info, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: compute csrRowPtrC and nnzC */ - // TODO: status = hipsparseSpruneDense2csrNnzByPercentage( - status = cusparseSpruneDense2csrNnzByPercentage( - handle, - m, - n, - d_A, - lda, - percentage, - descrC, - d_csrRowPtrC, - &nnzC, /* host */ - info, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printf("nnzC = %d\n", nnzC); - if (0 == nnzC) { - printf("C is empty \n"); - return 0; - } - - /* step 5: compute csrColIndC and csrValC */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - // CHECK: cudaStat2 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - cudaStat2 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat2); - // TODO: status = hipsparseSpruneDense2csrByPercentage( - status = cusparseSpruneDense2csrByPercentage( - handle, - m, - n, - d_A, - lda, - percentage, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - info, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 7: output C */ - csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); - csrColIndC = (int*)malloc(sizeof(int)*nnzC); - csrValC = (float*)malloc(sizeof(float)*nnzC); - assert(NULL != csrRowPtrC); - assert(NULL != csrColIndC); - assert(NULL != csrValC); - // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); - // CHECK: cudaStat2 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); - cudaStat2 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: cudaStat3 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); - cudaStat3 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - - printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); - - /* free resources */ - // CHECK: if (d_A) hipFree(d_A); - if (d_A) cudaFree(d_A); - // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); - if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); - // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); - if (d_csrColIndC) cudaFree(d_csrColIndC); - // CHECK: if (d_csrValC) hipFree(d_csrValC); - if (d_csrValC) cudaFree(d_csrValC); - - if (csrRowPtrC) free(csrRowPtrC); - if (csrColIndC) free(csrColIndC); - if (csrValC) free(csrValC); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); - if (descrC) cusparseDestroyMatDescr(descrC); - // TODO: if (info) hipsparseDestroyPruneInfo(info); - if (info) cusparseDestroyPruneInfo(info); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} - diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu deleted file mode 100644 index cc938d7da4..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu +++ /dev/null @@ -1,302 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -void printCsr( - int m, - int n, - int nnz, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const char* name) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - - printf("matrix %s is %d-by-%d, nnz=%d, base=%d, output base-1\n", name, m, n, nnz, base); - for (int row = 0; row < m; row++) { - const int start = csrRowPtrA[row] - base; - const int end = csrRowPtrA[row + 1] - base; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColIndA[colidx] - base; - const float Areg = csrValA[colidx]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrA = NULL; - cusparseMatDescr_t descrA = NULL; - // CHECK: hipsparseMatDescr_t descrC = NULL; - cusparseMatDescr_t descrC = NULL; - pruneInfo_t info = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - const int m = 4; - const int n = 4; - const int nnzA = 9; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - */ - - const int csrRowPtrA[m + 1] = { 1, 4, 5, 8, 10 }; - const int csrColIndA[nnzA] = { 1, 3, 4, 2, 1, 3, 4, 2, 4 }; - const float csrValA[nnzA] = { 1, 2, -3, 4, 5, 6, 7, 8, 9 }; - - int* csrRowPtrC = NULL; - int* csrColIndC = NULL; - float* csrValC = NULL; - - int *d_csrRowPtrA = NULL; - int *d_csrColIndA = NULL; - float *d_csrValA = NULL; - - int *d_csrRowPtrC = NULL; - int *d_csrColIndC = NULL; - float *d_csrValC = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - int nnzC = 0; - - float percentage = 20; /* remove 20% of nonzeros */ - - printf("example of pruneCsr2csrByPercentage \n"); - - printf("prune %.1f percent of nonzeros \n", percentage); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // TODO: status = hipsparseCreatePruneInfo(&info); - status = cusparseCreatePruneInfo(&info); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix C */ - // CHECK: status = hipsparseCreateMatDescr(&descrA); - status = cusparseCreateMatDescr(&descrA); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* A is base-1*/ - // CHECK: hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ONE); - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE); - // CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: status = hipsparseCreateMatDescr(&descrC); - status = cusparseCreateMatDescr(&descrC); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* C is base-0 */ - // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); - - printCsr(m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, "A"); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: query workspace */ - // TODO: status = hipsparseSpruneCsr2csrByPercentage_bufferSizeExt( - status = cusparseSpruneCsr2csrByPercentage_bufferSizeExt( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - percentage, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - info, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: compute csrRowPtrC and nnzC */ - // TODO: status = hipsparseSpruneCsr2csrNnzByPercentage( - status = cusparseSpruneCsr2csrNnzByPercentage( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - percentage, - descrC, - d_csrRowPtrC, - &nnzC, /* host */ - info, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printf("nnzC = %d\n", nnzC); - if (0 == nnzC) { - printf("C is empty \n"); - return 0; - } - - /* step 5: compute csrColIndC and csrValC */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // TODO: status = hipsparseSpruneCsr2csrByPercentage( - status = cusparseSpruneCsr2csrByPercentage( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - percentage, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - info, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: output C */ - csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); - csrColIndC = (int*)malloc(sizeof(int)*nnzC); - csrValC = (float*)malloc(sizeof(float)*nnzC); - assert(NULL != csrRowPtrC); - assert(NULL != csrColIndC); - assert(NULL != csrValC); - // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); - - /* free resources */ - // CHECK: if (d_csrRowPtrA) hipFree(d_csrRowPtrA); - if (d_csrRowPtrA) cudaFree(d_csrRowPtrA); - // CHECK: if (d_csrColIndA) hipFree(d_csrColIndA); - if (d_csrColIndA) cudaFree(d_csrColIndA); - // CHECK: if (d_csrValA) hipFree(d_csrValA); - if (d_csrValA) cudaFree(d_csrValA); - // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); - if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); - // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); - if (d_csrColIndC) cudaFree(d_csrColIndC); - // CHECK: if (d_csrValC) hipFree(d_csrValC); - if (d_csrValC) cudaFree(d_csrValC); - - if (csrRowPtrC) free(csrRowPtrC); - if (csrColIndC) free(csrColIndC); - if (csrValC) free(csrValC); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrA) hipsparseDestroyMatDescr(descrA); - if (descrA) cusparseDestroyMatDescr(descrA); - // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); - if (descrC) cusparseDestroyMatDescr(descrC); - // TODO: if (info) hipsparseDestroyPruneInfo(info); - if (info) cusparseDestroyPruneInfo(info); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu deleted file mode 100644 index 2c826935ef..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu +++ /dev/null @@ -1,413 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// NOTE: CUDA 10.0 - -/* - * compute | b - A*x|_inf - */ -void residaul_eval( - int n, - const float *dl, - const float *d, - const float *du, - const float *b, - const float *x, - float *r_nrminf_ptr) -{ - float r_nrminf = 0; - for (int i = 0; i < n; i++) { - float dot = 0; - if (i > 0) { - dot += dl[i] * x[i - 1]; - } - dot += d[i] * x[i]; - if (i < (n - 1)) { - dot += du[i] * x[i + 1]; - } - float ri = b[i] - dot; - r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri); - } - - *r_nrminf_ptr = r_nrminf; -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t cusparseH = NULL; - cusparseHandle_t cusparseH = NULL; - // CHECK: hipblasHandle_t cublasH = NULL; - cublasHandle_t cublasH = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS; - cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - - const int n = 3; - const int batchSize = 2; - /* - * | 1 6 0 | | 1 | | -0.603960 | - * A1 =| 4 2 7 |, b1 = | 2 |, x1 = | 0.267327 | - * | 0 5 3 | | 3 | | 0.554455 | - * - * | 8 13 0 | | 4 | | -0.063291 | - * A2 =| 11 9 14 |, b2 = | 5 |, x2 = | 0.346641 | - * | 0 12 10 | | 6 | | 0.184031 | - */ - - /* - * A = (dl, d, du), B and X are in aggregate format - */ - const float dl[n * batchSize] = { 0, 4, 5, 0, 11, 12 }; - const float d[n * batchSize] = { 1, 2, 3, 8, 9, 10 }; - const float du[n * batchSize] = { 6, 7, 0, 13, 14, 0 }; - const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6 }; - float X[n * batchSize]; /* Xj = Aj \ Bj */ - -/* device memory - * (d_dl0, d_d0, d_du0) is aggregate format - * (d_dl, d_d, d_du) is interleaved format - */ - float *d_dl0 = NULL; - float *d_d0 = NULL; - float *d_du0 = NULL; - float *d_dl = NULL; - float *d_d = NULL; - float *d_du = NULL; - float *d_B = NULL; - float *d_X = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - /* - * algo = 0: cuThomas (unstable) - * algo = 1: LU with pivoting (stable) - * algo = 2: QR (stable) - */ - const int algo = 2; - - const float h_one = 1; - const float h_zero = 0; - - printf("example of gtsv (interleaved format) \n"); - printf("choose algo = 0,1,2 to select different algorithms \n"); - printf("n = %d, batchSize = %d, algo = %d \n", n, batchSize, algo); - - /* step 1: create cusparse/cublas handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&cusparseH); - status = cusparseCreate(&cusparseH); - //CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(cusparseH, stream); - status = cusparseSetStream(cusparseH, stream); - //CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cublasStat = hipblasCreate(&cublasH); - cublasStat = cublasCreate(&cublasH); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cublasStat = hipblasSetStream(cublasH, stream); - cublasStat = cublasSetStream(cublasH, stream); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* step 2: allocate device memory */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: prepare data in device, interleaved format */ - // CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - /* convert dl to interleaved format - * dl = transpose(dl0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of dl */ - n, /* number of columns of dl */ - &h_one, - d_dl0, /* dl0 is n-by-batchSize */ - n, /* leading dimension of dl0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_dl, /* dl is batchSize-by-n */ - batchSize /* leading dimension of dl */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* convert d to interleaved format - * d = transpose(d0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of d */ - n, /* number of columns of d */ - &h_one, - d_d0, /* d0 is n-by-batchSize */ - n, /* leading dimension of d0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_d, /* d is batchSize-by-n */ - batchSize /* leading dimension of d */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert du to interleaved format - * du = transpose(du0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of du */ - n, /* number of columns of du */ - &h_one, - d_du0, /* du0 is n-by-batchSize */ - n, /* leading dimension of du0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_du, /* du is batchSize-by-n */ - batchSize /* leading dimension of du */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert B to interleaved format - * X = transpose(B) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of X */ - n, /* number of columns of X */ - &h_one, - d_B, /* B is n-by-batchSize */ - n, /* leading dimension of B */ - &h_zero, - NULL, - n, /* don't cae */ - d_X, /* X is batchSize-by-n */ - batchSize /* leading dimension of X */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* step 4: prepare workspace */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt( - status = cusparseSgtsvInterleavedBatch_bufferSizeExt( - cusparseH, - algo, - n, - d_dl, - d_d, - d_du, - d_X, - batchSize, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 5: solve Aj*xj = bj */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgtsvInterleavedBatch( - status = cusparseSgtsvInterleavedBatch( - cusparseH, - algo, - n, - d_dl, - d_d, - d_du, - d_X, - batchSize, - d_work); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: convert X back to aggregate format */ - /* B = transpose(X) */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - n, /* number of rows of B */ - batchSize, /* number of columns of B */ - &h_one, - d_X, /* X is batchSize-by-n */ - batchSize, /* leading dimension of X */ - &h_zero, - NULL, - n, /* don't cae */ - d_B, /* B is n-by-batchSize */ - n /* leading dimension of B */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - /* step 7: residual evaluation */ - // CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - printf("==== x1 = inv(A1)*b1 \n"); - for (int j = 0; j < n; j++) { - printf("x1[%d] = %f\n", j, X[j]); - } - - float r1_nrminf; - residaul_eval( - n, - dl, - d, - du, - B, - X, - &r1_nrminf - ); - printf("|b1 - A1*x1| = %E\n", r1_nrminf); - - printf("\n==== x2 = inv(A2)*b2 \n"); - for (int j = 0; j < n; j++) { - printf("x2[%d] = %f\n", j, X[n + j]); - } - - float r2_nrminf; - residaul_eval( - n, - dl + n, - d + n, - du + n, - B + n, - X + n, - &r2_nrminf - ); - printf("|b2 - A2*x2| = %E\n", r2_nrminf); - - /* free resources */ - // CHECK: if (d_dl0) hipFree(d_dl0); - if (d_dl0) cudaFree(d_dl0); - // CHECK: if (d_d0) hipFree(d_d0); - if (d_d0) cudaFree(d_d0); - // CHECK: if (d_du0) hipFree(d_du0); - if (d_du0) cudaFree(d_du0); - // CHECK: if (d_dl) hipFree(d_dl); - if (d_dl) cudaFree(d_dl); - // CHECK: if (d_d) hipFree(d_d); - if (d_d) cudaFree(d_d); - // CHECK: if (d_du) hipFree(d_du); - if (d_du) cudaFree(d_du); - // CHECK: if (d_B) hipFree(d_B); - if (d_B) cudaFree(d_B); - // CHECK: if (d_X) hipFree(d_X); - if (d_X) cudaFree(d_X); - // CHECK: if (cusparseH) hipsparseDestroy(cusparseH); - if (cusparseH) cusparseDestroy(cusparseH); - // CHECK: if (cublasH) hipblasDestroy(cublasH); - if (cublasH) cublasDestroy(cublasH); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu deleted file mode 100644 index 3bcbd96bb0..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu +++ /dev/null @@ -1,414 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// NOTE: CUDA 10.0 - -/* - * compute | b - A*x|_inf - */ -void residaul_eval( - int n, - const float *dl, - const float *d, - const float *du, - const float *b, - const float *x, - float *r_nrminf_ptr) -{ - float r_nrminf = 0; - for (int i = 0; i < n; i++) { - float dot = 0; - if (i > 0) { - dot += dl[i] * x[i - 1]; - } - dot += d[i] * x[i]; - if (i < (n - 1)) { - dot += du[i] * x[i + 1]; - } - float ri = b[i] - dot; - r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri); - } - - *r_nrminf_ptr = r_nrminf; -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t cusparseH = NULL; - cusparseHandle_t cusparseH = NULL; - // CHECK: hipblasHandle_t cublasH = NULL; - cublasHandle_t cublasH = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS; - cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - - const int n = 3; - const int batchSize = 2; - /* - * | 1 6 0 | | 1 | | -0.603960 | - * A1 =| 4 2 7 |, b1 = | 2 |, x1 = | 0.267327 | - * | 0 5 3 | | 3 | | 0.554455 | - * - * | 8 13 0 | | 4 | | -0.063291 | - * A2 =| 11 9 14 |, b2 = | 5 |, x2 = | 0.346641 | - * | 0 12 10 | | 6 | | 0.184031 | - */ - - /* - * A = (dl, d, du), B and X are in aggregate format - */ - const float dl[n * batchSize] = { 0, 4, 5, 0, 11, 12 }; - const float d[n * batchSize] = { 1, 2, 3, 8, 9, 10 }; - const float du[n * batchSize] = { 6, 7, 0, 13, 14, 0 }; - const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6 }; - float X[n * batchSize]; /* Xj = Aj \ Bj */ - -/* device memory - * (d_dl0, d_d0, d_du0) is aggregate format - * (d_dl, d_d, d_du) is interleaved format - */ - float *d_dl0 = NULL; - float *d_d0 = NULL; - float *d_du0 = NULL; - float *d_dl = NULL; - float *d_d = NULL; - float *d_du = NULL; - float *d_B = NULL; - float *d_X = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - /* - * algo = 0: cuThomas (unstable) - * algo = 1: LU with pivoting (stable) - * algo = 2: QR (stable) - */ - const int algo = 2; - - const float h_one = 1; - const float h_zero = 0; - - printf("example of gtsv (interleaved format) \n"); - printf("choose algo = 0,1,2 to select different algorithms \n"); - printf("n = %d, batchSize = %d, algo = %d \n", n, batchSize, algo); - - /* step 1: create cusparse/cublas handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&cusparseH); - status = cusparseCreate(&cusparseH); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(cusparseH, stream); - status = cusparseSetStream(cusparseH, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cublasStat = hipblasCreate(&cublasH); - cublasStat = cublasCreate(&cublasH); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cublasStat = hipblasSetStream(cublasH, stream); - cublasStat = cublasSetStream(cublasH, stream); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* step 2: allocate device memory */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: prepare data in device, interleaved format */ - // CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - /* convert dl to interleaved format - * dl = transpose(dl0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of dl */ - n, /* number of columns of dl */ - &h_one, - d_dl0, /* dl0 is n-by-batchSize */ - n, /* leading dimension of dl0 */ - &h_zero, - NULL, - n, /* don't care */ - d_dl, /* dl is batchSize-by-n */ - batchSize /* leading dimension of dl */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* convert d to interleaved format - * d = transpose(d0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T - // CHECK: HIPBLAS_OP_T - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of d */ - n, /* number of columns of d */ - &h_one, - d_d0, /* d0 is n-by-batchSize */ - n, /* leading dimension of d0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_d, /* d is batchSize-by-n */ - batchSize /* leading dimension of d */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert du to interleaved format - * du = transpose(du0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T - // CHECK: HIPBLAS_OP_T - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of du */ - n, /* number of columns of du */ - &h_one, - d_du0, /* du0 is n-by-batchSize */ - n, /* leading dimension of du0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_du, /* du is batchSize-by-n */ - batchSize /* leading dimension of du */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert B to interleaved format - * X = transpose(B) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T - // CHECK: HIPBLAS_OP_T - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of X */ - n, /* number of columns of X */ - &h_one, - d_B, /* B is n-by-batchSize */ - n, /* leading dimension of B */ - &h_zero, - NULL, - n, /* don't cae */ - d_X, /* X is batchSize-by-n */ - batchSize /* leading dimension of X */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* step 4: prepare workspace */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt( - status = cusparseSgtsvInterleavedBatch_bufferSizeExt( - cusparseH, - algo, - n, - d_dl, - d_d, - d_du, - d_X, - batchSize, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 5: solve Aj*xj = bj */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgtsvInterleavedBatch( - status = cusparseSgtsvInterleavedBatch( - cusparseH, - algo, - n, - d_dl, - d_d, - d_du, - d_X, - batchSize, - d_work); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: convert X back to aggregate format */ - /* B = transpose(X) */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T - // CHECK: HIPBLAS_OP_T - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - n, /* number of rows of B */ - batchSize, /* number of columns of B */ - &h_one, - d_X, /* X is batchSize-by-n */ - batchSize, /* leading dimension of X */ - &h_zero, - NULL, - n, /* don't cae */ - d_B, /* B is n-by-batchSize */ - n /* leading dimension of B */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - /* step 7: residual evaluation */ - // CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - printf("==== x1 = inv(A1)*b1 \n"); - for (int j = 0; j < n; j++) { - printf("x1[%d] = %f\n", j, X[j]); - } - - float r1_nrminf; - residaul_eval( - n, - dl, - d, - du, - B, - X, - &r1_nrminf - ); - printf("|b1 - A1*x1| = %E\n", r1_nrminf); - - printf("\n==== x2 = inv(A2)*b2 \n"); - for (int j = 0; j < n; j++) { - printf("x2[%d] = %f\n", j, X[n + j]); - } - - float r2_nrminf; - residaul_eval( - n, - dl + n, - d + n, - du + n, - B + n, - X + n, - &r2_nrminf - ); - printf("|b2 - A2*x2| = %E\n", r2_nrminf); - - /* free resources */ - // CHECK: if (d_dl0) hipFree(d_dl0); - if (d_dl0) cudaFree(d_dl0); - // CHECK: if (d_d0) hipFree(d_d0); - if (d_d0) cudaFree(d_d0); - // CHECK: if (d_du0) hipFree(d_du0); - if (d_du0) cudaFree(d_du0); - // CHECK: if (d_dl) hipFree(d_dl); - if (d_dl) cudaFree(d_dl); - // CHECK: if (d_d) hipFree(d_d); - if (d_d) cudaFree(d_d); - // CHECK: if (d_du) hipFree(d_du); - if (d_du) cudaFree(d_du); - // CHECK: if (d_B) hipFree(d_B); - if (d_B) cudaFree(d_B); - // CHECK: if (d_X) hipFree(d_X); - if (d_X) cudaFree(d_X); - // CHECK: if (cusparseH) hipsparseDestroy(cusparseH); - if (cusparseH) cusparseDestroy(cusparseH); - // CHECK: if (cublasH) hipblasDestroy(cublasH); - if (cublasH) cublasDestroy(cublasH); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu deleted file mode 100644 index 2ab2d605de..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu +++ /dev/null @@ -1,507 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// NOTE: CUDA 10.0 - -/* - * compute | b - A*x|_inf - */ -void residaul_eval( - int n, - const float *ds, - const float *dl, - const float *d, - const float *du, - const float *dw, - const float *b, - const float *x, - float *r_nrminf_ptr) -{ - float r_nrminf = 0; - for (int i = 0; i < n; i++) { - float dot = 0; - if (i > 1) { - dot += ds[i] * x[i - 2]; - } - if (i > 0) { - dot += dl[i] * x[i - 1]; - } - dot += d[i] * x[i]; - if (i < (n - 1)) { - dot += du[i] * x[i + 1]; - } - if (i < (n - 2)) { - dot += dw[i] * x[i + 2]; - } - float ri = b[i] - dot; - r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri); - } - - *r_nrminf_ptr = r_nrminf; -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t cusparseH = NULL; - cusparseHandle_t cusparseH = NULL; - // CHECK: hipblasHandle_t cublasH = NULL; - cublasHandle_t cublasH = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS; - cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - - const int n = 4; - const int batchSize = 2; - - /* - * | 1 8 13 0 | | 1 | | -0.0592 | - * A1 =| 5 2 9 14 |, b1 = | 2 |, x1 = | 0.3428 | - * | 11 6 3 10 | | 3 | | -0.1295 | - * | 0 12 7 4 | | 4 | | 0.1982 | - * - * | 15 22 27 0 | | 5 | | -0.0012 | - * A2 =| 19 16 23 28 |, b2 = | 6 |, x2 = | 0.2792 | - * | 25 20 17 24 | | 7 | | -0.0416 | - * | 0 26 21 18 | | 8 | | 0.0898 | - */ - - /* - * A = (ds, dl, d, du, dw), B and X are in aggregate format - */ - const float ds[n * batchSize] = { 0, 0, 11, 12, 0, 0, 25, 26 }; - const float dl[n * batchSize] = { 0, 5, 6, 7, 0, 19, 20, 21 }; - const float d[n * batchSize] = { 1, 2, 3, 4, 15, 16, 17, 18 }; - const float du[n * batchSize] = { 8, 9, 10, 0, 22, 23, 24, 0 }; - const float dw[n * batchSize] = { 13,14, 0, 0, 27, 28, 0, 0 }; - const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6, 7, 8 }; - float X[n * batchSize]; /* Xj = Aj \ Bj */ - -/* device memory - * (d_ds0, d_dl0, d_d0, d_du0, d_dw0) is aggregate format - * (d_ds, d_dl, d_d, d_du, d_dw) is interleaved format - */ - float *d_ds0 = NULL; - float *d_dl0 = NULL; - float *d_d0 = NULL; - float *d_du0 = NULL; - float *d_dw0 = NULL; - float *d_ds = NULL; - float *d_dl = NULL; - float *d_d = NULL; - float *d_du = NULL; - float *d_dw = NULL; - float *d_B = NULL; - float *d_X = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - const float h_one = 1; - const float h_zero = 0; - - int algo = 0; /* QR factorization */ - - printf("example of gpsv (interleaved format) \n"); - printf("n = %d, batchSize = %d\n", n, batchSize); - - /* step 1: create cusparse/cublas handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&cusparseH); - status = cusparseCreate(&cusparseH); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(cusparseH, stream); - status = cusparseSetStream(cusparseH, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cublasStat = hipblasCreate(&cublasH); - cublasStat = cublasCreate(&cublasH); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cublasStat = hipblasSetStream(cublasH, stream); - cublasStat = cublasSetStream(cublasH, stream); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* step 2: allocate device memory */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_ds0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_ds0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dw0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dw0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_ds, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_ds, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dw, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dw, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - /* step 3: prepare data in device, interleaved format */ - // CHECK: cudaStat1 = hipMemcpy(d_ds0, ds, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_ds0, ds, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_dw0, dw, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_dw0, dw, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - /* convert ds to interleaved format - * ds = transpose(ds0) */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of ds */ - n, /* number of columns of ds */ - &h_one, - d_ds0, /* ds0 is n-by-batchSize */ - n, /* leading dimension of ds0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_ds, /* ds is batchSize-by-n */ - batchSize); /* leading dimension of ds */ - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* convert dl to interleaved format - * dl = transpose(dl0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of dl */ - n, /* number of columns of dl */ - &h_one, - d_dl0, /* dl0 is n-by-batchSize */ - n, /* leading dimension of dl0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_dl, /* dl is batchSize-by-n */ - batchSize /* leading dimension of dl */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert d to interleaved format - * d = transpose(d0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of d */ - n, /* number of columns of d */ - &h_one, - d_d0, /* d0 is n-by-batchSize */ - n, /* leading dimension of d0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_d, /* d is batchSize-by-n */ - batchSize /* leading dimension of d */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert du to interleaved format - * du = transpose(du0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of du */ - n, /* number of columns of du */ - &h_one, - d_du0, /* du0 is n-by-batchSize */ - n, /* leading dimension of du0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_du, /* du is batchSize-by-n */ - batchSize /* leading dimension of du */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* convert dw to interleaved format - * dw = transpose(dw0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of dw */ - n, /* number of columns of dw */ - &h_one, - d_dw0, /* dw0 is n-by-batchSize */ - n, /* leading dimension of dw0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_dw, /* dw is batchSize-by-n */ - batchSize /* leading dimension of dw */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert B to interleaved format - * X = transpose(B) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of X */ - n, /* number of columns of X */ - &h_one, - d_B, /* B is n-by-batchSize */ - n, /* leading dimension of B */ - &h_zero, - NULL, - n, /* don't cae */ - d_X, /* X is batchSize-by-n */ - batchSize /* leading dimension of X */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* step 4: prepare workspace */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgpsvInterleavedBatch_bufferSizeExt( - status = cusparseSgpsvInterleavedBatch_bufferSizeExt( - cusparseH, - algo, - n, - d_ds, - d_dl, - d_d, - d_du, - d_dw, - d_X, - batchSize, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - /* step 5: solve Aj*xj = bj */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgpsvInterleavedBatch( - status = cusparseSgpsvInterleavedBatch( - cusparseH, - algo, - n, - d_ds, - d_dl, - d_d, - d_du, - d_dw, - d_X, - batchSize, - d_work); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: convert X back to aggregate format */ - /* B = transpose(X) */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - n, /* number of rows of B */ - batchSize, /* number of columns of B */ - &h_one, - d_X, /* X is batchSize-by-n */ - batchSize, /* leading dimension of X */ - &h_zero, - NULL, - n, /* don't cae */ - d_B, /* B is n-by-batchSize */ - n /* leading dimension of B */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - /* step 7: residual evaluation */ - // CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - printf("==== x1 = inv(A1)*b1 \n"); - for (int j = 0; j < n; j++) { - printf("x1[%d] = %f\n", j, X[j]); - } - - float r1_nrminf; - residaul_eval( - n, - ds, - dl, - d, - du, - dw, - B, - X, - &r1_nrminf - ); - printf("|b1 - A1*x1| = %E\n", r1_nrminf); - printf("\n==== x2 = inv(A2)*b2 \n"); - for (int j = 0; j < n; j++) { - printf("x2[%d] = %f\n", j, X[n + j]); - } - - float r2_nrminf; - residaul_eval( - n, - ds + n, - dl + n, - d + n, - du + n, - dw + n, - B + n, - X + n, - &r2_nrminf - ); - printf("|b2 - A2*x2| = %E\n", r2_nrminf); - - /* free resources */ - // CHECK: if (d_ds0) hipFree(d_ds0); - if (d_ds0) cudaFree(d_ds0); - // CHECK: if (d_dl0) hipFree(d_dl0); - if (d_dl0) cudaFree(d_dl0); - // CHECK: if (d_d0) hipFree(d_d0); - if (d_d0) cudaFree(d_d0); - // CHECK: if (d_du0) hipFree(d_du0); - if (d_du0) cudaFree(d_du0); - // CHECK: if (d_dw0) hipFree(d_dw0); - if (d_dw0) cudaFree(d_dw0); - // CHECK: if (d_ds) hipFree(d_ds); - if (d_ds) cudaFree(d_ds); - // CHECK: if (d_dl) hipFree(d_dl); - if (d_dl) cudaFree(d_dl); - // CHECK: if (d_d) hipFree(d_d); - if (d_d) cudaFree(d_d); - // CHECK: if (d_du) hipFree(d_du); - if (d_du) cudaFree(d_du); - // CHECK: if (d_dw) hipFree(d_dw); - if (d_dw) cudaFree(d_dw); - // CHECK: if (d_B) hipFree(d_B); - if (d_B) cudaFree(d_B); - // CHECK: if (d_X) hipFree(d_X); - if (d_X) cudaFree(d_X); - // CHECK: if (cusparseH) hipsparseDestroy(cusparseH); - if (cusparseH) cusparseDestroy(cusparseH); - // CHECK: if (cublasH) hipblasDestroy(cublasH); - if (cublasH) cublasDestroy(cublasH); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu deleted file mode 100644 index 868f3be69e..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu +++ /dev/null @@ -1,327 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// NOTE: CUDA 10.0 - -/* compute | b - A*x|_inf */ -void residaul_eval( - int n, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrVal, - const int *csrRowPtr, - const int *csrColInd, - const float *b, - const float *x, - float *r_nrminf_ptr) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - // CHECK: const int lower = (HIPSPARSE_FILL_MODE_LOWER == hipsparseGetMatFillMode(descrA)) ? 1 : 0; - const int lower = (CUSPARSE_FILL_MODE_LOWER == cusparseGetMatFillMode(descrA)) ? 1 : 0; - // CHECK: const int unit = (HIPSPARSE_DIAG_TYPE_UNIT == hipsparseGetMatDiagType(descrA)) ? 1 : 0; - const int unit = (CUSPARSE_DIAG_TYPE_UNIT == cusparseGetMatDiagType(descrA)) ? 1 : 0; - - float r_nrminf = 0; - for (int row = 0; row < n; row++) { - const int start = csrRowPtr[row] - base; - const int end = csrRowPtr[row + 1] - base; - float dot = 0; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColInd[colidx] - base; - float Aij = csrVal[colidx]; - float xj = x[col]; - if ((row == col) && unit) { - Aij = 1.0; - } - int valid = (row >= col) && lower || - (row <= col) && !lower; - if (valid) { - dot += Aij * xj; - } - } - float ri = b[row] - dot; - r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri); - } - *r_nrminf_ptr = r_nrminf; -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrA = NULL; - cusparseMatDescr_t descrA = NULL; - // NOTE: CUDA 10.0 - // TODO: csrsm2Info_t info = NULL; - csrsm2Info_t info = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - const int nrhs = 2; - const int n = 4; - const int nnzA = 9; - // CHECK: const hipsparseSolvePolicy_t policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL; - const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_NO_LEVEL; - const float h_one = 1.0; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - * Regard A as a lower triangle matrix L with non-unit diagonal. - * | 1 5 | | 1 5 | - * Given B = | 2 6 |, X = L \ B = | 0.5 1.5 | - * | 3 7 | | -0.3333 -3 | - * | 4 8 | | 0 -0.4444 | - */ - const int csrRowPtrA[n + 1] = { 1, 4, 5, 8, 10 }; - const int csrColIndA[nnzA] = { 1, 3, 4, 2, 1, 3, 4, 2, 4 }; - const float csrValA[nnzA] = { 1, 2, -3, 4, 5, 6, 7, 8, 9 }; - const float B[n*nrhs] = { 1,2,3,4,5,6,7,8 }; - float X[n*nrhs]; - - int *d_csrRowPtrA = NULL; - int *d_csrColIndA = NULL; - float *d_csrValA = NULL; - float *d_B = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - const int algo = 0; /* non-block version */ - - printf("example of csrsm2 \n"); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - // NOTE: CUDA 10.0 - // TODO: status = hipsparseCreateCsrsm2Info(&info); - status = cusparseCreateCsrsm2Info(&info); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix A */ - status = cusparseCreateMatDescr(&descrA); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* A is base-1*/ - // CHECK: hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ONE); - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE); - // CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL); - /* A is lower triangle */ - // CHECK: hipsparseSetMatFillMode(descrA, HIPSPARSE_FILL_MODE_LOWER); - cusparseSetMatFillMode(descrA, CUSPARSE_FILL_MODE_LOWER); - /* A has non unit diagonal */ - // CHECK: hipsparseSetMatDiagType(descrA, HIPSPARSE_DIAG_TYPE_NON_UNIT); - cusparseSetMatDiagType(descrA, CUSPARSE_DIAG_TYPE_NON_UNIT); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrA, sizeof(int)*(n + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrA, sizeof(int)*(n + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*nrhs); - cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*nrhs); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(n + 1), hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(n + 1), cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*nrhs, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*nrhs, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: query workspace */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseScsrsm2_bufferSizeExt( - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - status = cusparseScsrsm2_bufferSizeExt( - handle, - algo, - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transA */ - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transB */ - n, - nrhs, - nnzA, - &h_one, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_B, - n, /* ldb */ - info, - policy, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: analysis */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseScsrsm2_analysis( - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - status = cusparseScsrsm2_analysis( - handle, - algo, - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transA */ - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transB */ - n, - nrhs, - nnzA, - &h_one, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_B, - n, /* ldb */ - info, - policy, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* step 5: solve L * X = B */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseScsrsm2_solve( - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - status = cusparseScsrsm2_solve( - handle, - algo, - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transA */ - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transB */ - n, - nrhs, - nnzA, - &h_one, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_B, - n, /* ldb */ - info, - policy, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6:measure residual B - A*X */ - // CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*nrhs, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*nrhs, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - printf("==== x1 = inv(A)*b1 \n"); - for (int j = 0; j < n; j++) { - printf("x1[%d] = %f\n", j, X[j]); - } - float r1_nrminf; - residaul_eval( - n, - descrA, - csrValA, - csrRowPtrA, - csrColIndA, - B, - X, - &r1_nrminf - ); - printf("|b1 - A*x1| = %E\n", r1_nrminf); - - printf("==== x2 = inv(A)*b2 \n"); - for (int j = 0; j < n; j++) { - printf("x2[%d] = %f\n", j, X[n + j]); - } - float r2_nrminf; - residaul_eval( - n, - descrA, - csrValA, - csrRowPtrA, - csrColIndA, - B + n, - X + n, - &r2_nrminf - ); - printf("|b2 - A*x2| = %E\n", r2_nrminf); - - /* free resources */ - // CHECK: if (d_csrRowPtrA) hipFree(d_csrRowPtrA); - if (d_csrRowPtrA) cudaFree(d_csrRowPtrA); - // CHECK: if (d_csrColIndA) hipFree(d_csrColIndA); - if (d_csrColIndA) cudaFree(d_csrColIndA); - // CHECK: if (d_csrValA) hipFree(d_csrValA); - if (d_csrValA) cudaFree(d_csrValA); - // CHECK: if (d_B) hipFree(d_B); - if (d_B) cudaFree(d_B); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrA) hipsparseDestroyMatDescr(descrA); - if (descrA) cusparseDestroyMatDescr(descrA); - // NOTE: CUDA 10.0 - // TODO: if (info) hipsparseDestroyCsrsm2Info(info); - if (info) cusparseDestroyCsrsm2Info(info); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu deleted file mode 100644 index e6a2178053..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu +++ /dev/null @@ -1,410 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -// CHECK: #include -// CHECK: #include -#include -#include -#include -#include -#include -#include - -#define Min(x,y) ((x)<(y)?(x):(y)) -#define Max(x,y) ((x)>(y)?(x):(y)) -#define Abs(x) ((x)>(0)?(x):-(x)) -// CHECK: static void CudaCheckCore(hipError_t code, const char *file, int line) { -static void CudaCheckCore(cudaError_t code, const char *file, int line) { - // CHECK: if (code != hipSuccess) { - if (code != cudaSuccess) { - // CHECK: fprintf(stderr,"Cuda Error %d : %s %s %d\n", code, hipGetErrorString(code), file, line); - fprintf(stderr,"Cuda Error %d : %s %s %d\n", code, cudaGetErrorString(code), file, line); - exit(code); - } -} - -#define CudaCheck( test ) { CudaCheckCore((test), __FILE__, __LINE__); } -// CHECK: #define CudaCheckAfterCall() { CudaCheckCore((hipGetLastError()), __FILE__, __LINE__); } -#define CudaCheckAfterCall() { CudaCheckCore((cudaGetLastError()), __FILE__, __LINE__); } - -// CHECK: static const char * GetErrorString(hipsparseStatus_t error) { -static const char * GetErrorString(cusparseStatus_t error) { - switch (error) { - // CHECK: case HIPSPARSE_STATUS_SUCCESS: - case CUSPARSE_STATUS_SUCCESS: - return "The operation completed successfully."; - // CHECK: case HIPSPARSE_STATUS_NOT_INITIALIZED: - case CUSPARSE_STATUS_NOT_INITIALIZED: - return "The cuSPARSE library was not initialized. This is usually caused by the lack of a prior call, an error in the CUDA Runtime API called by the cuSPARSE routine, or an error in the hardware setup.\n" \ - "To correct: call cusparseCreate() prior to the function call; and check that the hardware, an appropriate version of the driver, and the cuSPARSE library are correctly installed."; - // CHECK: case HIPSPARSE_STATUS_ALLOC_FAILED: - case CUSPARSE_STATUS_ALLOC_FAILED: - return "Resource allocation failed inside the cuSPARSE library. This is usually caused by a cudaMalloc() failure.\n"\ - "To correct: prior to the function call, deallocate previously allocated memory as much as possible."; - // CHECK: case HIPSPARSE_STATUS_INVALID_VALUE: - case CUSPARSE_STATUS_INVALID_VALUE: - return "An unsupported value or parameter was passed to the function (a negative vector size, for example).\n"\ - "To correct: ensure that all the parameters being passed have valid values."; - // CHECK: case HIPSPARSE_STATUS_ARCH_MISMATCH: - case CUSPARSE_STATUS_ARCH_MISMATCH: - return "The function requires a feature absent from the device architecture; usually caused by the lack of support for atomic operations or double precision.\n"\ - "To correct: compile and run the application on a device with appropriate compute capability, which is 1.1 for 32-bit atomic operations and 1.3 for double precision."; - // CHECK: case HIPSPARSE_STATUS_MAPPING_ERROR: - case CUSPARSE_STATUS_MAPPING_ERROR: - return "An access to GPU memory space failed, which is usually caused by a failure to bind a texture.\n"\ - "To correct: prior to the function call, unbind any previously bound textures."; - // CHECK: case HIPSPARSE_STATUS_EXECUTION_FAILED: - case CUSPARSE_STATUS_EXECUTION_FAILED: - return "The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.\n"\ - "To correct: check that the hardware, an appropriate version of the driver, and the cuSPARSE library are correctly installed."; - // CHECK: case HIPSPARSE_STATUS_INTERNAL_ERROR: - case CUSPARSE_STATUS_INTERNAL_ERROR: - return "An internal cuSPARSE operation failed. This error is usually caused by a cudaMemcpyAsync() failure.\n"\ - "To correct: check that the hardware, an appropriate version of the driver, and the cuSPARSE library are correctly installed. Also, check that the memory passed as a parameter to the routine is not being deallocated prior to the routine’s completion."; - // CHECK: case HIPSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - // CHECK: "To correct: check that the fields in hipsparseMatDescr_t descrA were set correctly."; - case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - return "The matrix type is not supported by this function. This is usually caused by passing an invalid matrix descriptor to the function.\n"\ - "To correct: check that the fields in cusparseMatDescr_t descrA were set correctly."; - } - return ""; -} - -// CHECK: static void CudaSparseCheckCore(hipsparseStatus_t code, const char *file, int line) { -static void CudaSparseCheckCore(cusparseStatus_t code, const char *file, int line) { - // CHECK: if (code != HIPSPARSE_STATUS_SUCCESS) { - if (code != CUSPARSE_STATUS_SUCCESS) { - fprintf(stderr,"Cuda Error %d : %s %s %d\n", code, GetErrorString(code), file, line); - exit(code); - } -} - -#define CudaSparseCheck( test ) { CudaSparseCheckCore((test), __FILE__, __LINE__); } - -// Alloc and copy -template -ObjectType* allocAndCopy(const ObjectType src[], const int size) { - ObjectType* dest = NULL; - // CHECK: CudaCheck( hipMalloc(&dest,size*sizeof(ObjectType)) ); - CudaCheck( cudaMalloc(&dest,size*sizeof(ObjectType)) ); - // CHECK: CudaCheck( hipMemcpy(dest, src, size*sizeof(ObjectType), hipMemcpyHostToDevice ) ); - CudaCheck( cudaMemcpy(dest, src, size*sizeof(ObjectType), cudaMemcpyHostToDevice ) ); - return dest; -} - -template -ObjectType* alloc(const int size) { - ObjectType* dest = NULL; - // CHECK: CudaCheck( hipMalloc(&dest,size*sizeof(ObjectType)) ); - CudaCheck( cudaMalloc(&dest,size*sizeof(ObjectType)) ); - return dest; -} - -template -ObjectType* allocAndCopyPart(const ObjectType src[], const int size, const int allocSize) { - ObjectType* dest = NULL; - assert(size <= allocSize); - // CHECK: CudaCheck( hipMalloc(&dest,allocSize*sizeof(ObjectType)) ); - // CHECK: CudaCheck( hipMemcpy(dest, src, size*sizeof(ObjectType), hipMemcpyHostToDevice ) ); - // CHECK: CudaCheck( hipMemset(&dest[size],0,(allocSize-size)*sizeof(ObjectType)) ); - CudaCheck( cudaMalloc(&dest,allocSize*sizeof(ObjectType)) ); - CudaCheck( cudaMemcpy(dest, src, size*sizeof(ObjectType), cudaMemcpyHostToDevice ) ); - CudaCheck( cudaMemset(&dest[size],0,(allocSize-size)*sizeof(ObjectType)) ); - return dest; -} - -// COO part -#include - -struct Ijv { - int i, j; - double v; -}; - -bool IjvComp(const Ijv& v1, const Ijv& v2) { - return v1.i < v2.i || (v1.i == v2.i && v1.j < v2.j); -} - -struct COOArrays { - int m; - int nnz; - double *val;/*values(NNZ)*/ - int *rowind;/*i(NNZ)*/ - int *colind;/*j(NNZ)*/ - - COOArrays() { - val = NULL; - rowind = NULL; - colind = NULL; - } - - ~COOArrays() { - delete[] val; - delete[] rowind; - delete[] colind; - } - - void sortToRowMajor() { - Ijv* ijvs = new Ijv[nnz]; - for(int idxCopy = 0 ; idxCopy < nnz ; ++idxCopy){ - ijvs[idxCopy].i = rowind[idxCopy]; - ijvs[idxCopy].j = colind[idxCopy]; - ijvs[idxCopy].v = val[idxCopy]; - } - std::sort(ijvs, ijvs+nnz, IjvComp); - for(int idxCopy = 0 ; idxCopy < nnz ; ++idxCopy){ - rowind[idxCopy] = ijvs[idxCopy].i; - colind[idxCopy] = ijvs[idxCopy].j; - val[idxCopy] = ijvs[idxCopy].v; - } - delete[] ijvs; - } -}; - -void compute_COO(COOArrays& coo, double *x , double *y ) { - for(int idxVal = 0 ; idxVal < coo.nnz ; ++idxVal){ - y[coo.rowind[idxVal]] += x[coo.colind[idxVal]] * coo.val[idxVal]; - } -} - -// COO part -struct CRSArrays { - int m; //< the dim of the matrix - int nnz;//< the number of nnz (== ia[m]) - double *cu_csrValA; //< the values (of size NNZ) - int *cu_csrRowPtrA;//< the usual rowptr (of size m+1) - int *cu_csrColIndA;//< the colidx of each NNZ (of size nnz) - // CHECK: hipStream_t streamId; - // CHECK: hipsparseHandle_t cusparseHandle; - cudaStream_t streamId; - cusparseHandle_t cusparseHandle; - - CRSArrays() { - cu_csrValA = NULL; - cu_csrRowPtrA = NULL; - cu_csrColIndA = NULL; - // Create sparse handle (needed to call sparse functions - streamId = 0; - // CHECK-NOT: hipsparseHandle = 0; - cusparseHandle = 0; - // CHECK: CudaSparseCheck(hipsparseCreate(&cusparseHandle)); - // CHECK: CudaSparseCheck(hipsparseSetStream(cusparseHandle, streamId)); - CudaSparseCheck(cusparseCreate(&cusparseHandle)); - CudaSparseCheck(cusparseSetStream(cusparseHandle, streamId)); - } - - ~CRSArrays() { - // CHECK: CudaCheck(hipFree(cu_csrValA)); - // CHECK: CudaCheck(hipFree(cu_csrRowPtrA)); - // CHECK: CudaCheck(hipFree(cu_csrColIndA)); - CudaCheck(cudaFree(cu_csrValA)); - CudaCheck(cudaFree(cu_csrRowPtrA)); - CudaCheck(cudaFree(cu_csrColIndA)); - // Destroy sparse handle - // CHECK: CudaSparseCheck(hipsparseDestroy(cusparseHandle)); - CudaSparseCheck(cusparseDestroy(cusparseHandle)); - } -}; - -void COO_to_CRS(COOArrays& coo, CRSArrays* crs) { - // We need COO to be sorted by row (and column) - coo.sortToRowMajor(); - crs->m = coo.m; - crs->nnz = coo.nnz; - // Convert COO to CSR (it is just for the rows idx) - crs->cu_csrRowPtrA = alloc(coo.m+1); - { - int* cu_cooRowIndA = allocAndCopy(coo.rowind, coo.nnz); - // CHECK: CudaSparseCheck(hipsparseXcoo2csr(crs->cusparseHandle, cu_cooRowIndA, - // CHECK: coo.nnz, coo.m, crs->cu_csrRowPtrA, HIPSPARSE_INDEX_BASE_ZERO)); - CudaSparseCheck(cusparseXcoo2csr(crs->cusparseHandle, cu_cooRowIndA, - coo.nnz, coo.m, crs->cu_csrRowPtrA, CUSPARSE_INDEX_BASE_ZERO)); - // CHECK: CudaCheck(hipFree(cu_cooRowIndA)); - CudaCheck(cudaFree(cu_cooRowIndA)); - } - // Copy cols idx and values that are unchanged - crs->cu_csrValA = allocAndCopy(coo.val, coo.nnz); - crs->cu_csrColIndA = allocAndCopy(coo.colind, coo.nnz); -} - -double compute_CRS( CRSArrays& crs, double *x , double *y) { - // For blas 2 gemv y = alpha.x.A + Beta.y - const double alpha = 1.0; - const double beta = 0.0; - // Copy input - double* cu_x = allocAndCopy(x, crs.m); - double* cu_y = allocAndCopy(y, crs.m); - // Init matrix properties - // CHECK: hipsparseMatDescr_t descr = 0; - cusparseMatDescr_t descr = 0; - // CHECK: CudaSparseCheck(hipsparseCreateMatDescr(&descr)); - CudaSparseCheck(cusparseCreateMatDescr(&descr)); - // CHECK: hipsparseSetMatType(descr,HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: hipsparseSetMatIndexBase(descr,HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); - // Compute gemv - float gemvComputeTume = 0; - { - // CHECK: hipEvent_t startTime, stopTime; - // CHECK: hipEventCreate(&startTime); - // CHECK: hipEventCreate(&stopTime); - // CHECK: hipEventRecord(startTime, crs.streamId); - cudaEvent_t startTime, stopTime; - cudaEventCreate(&startTime); - cudaEventCreate(&stopTime); - cudaEventRecord(startTime, crs.streamId); - // CHECK: CudaSparseCheck(hipsparseDcsrmv(crs.cusparseHandle, HIPSPARSE_OPERATION_NON_TRANSPOSE, - CudaSparseCheck(cusparseDcsrmv(crs.cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, - crs.m, crs.m, crs.nnz, &alpha, - descr, crs.cu_csrValA, crs.cu_csrRowPtrA, - crs.cu_csrColIndA, cu_x, &beta, cu_y)); - // CHECK: hipEventRecord(stopTime, crs.streamId); - // CHECK: hipEventSynchronize(stopTime); - // CHECK: hipEventElapsedTime(&gemvComputeTume, startTime, stopTime); - cudaEventRecord(stopTime, crs.streamId); - cudaEventSynchronize(stopTime); - cudaEventElapsedTime(&gemvComputeTume, startTime, stopTime); - gemvComputeTume /=1000.0; - } - // Get back result - // CHECK: CudaCheck( hipMemcpy(y, cu_y, crs.m*sizeof(double), hipMemcpyDeviceToHost ) ); - CudaCheck( cudaMemcpy(y, cu_y, crs.m*sizeof(double), cudaMemcpyDeviceToHost ) ); - // Dealloc vectors - // CHECK: CudaCheck(hipFree(cu_x)); - // CHECK: CudaCheck(hipFree(cu_y)); - CudaCheck(cudaFree(cu_x)); - CudaCheck(cudaFree(cu_y)); - return gemvComputeTume; -} - -// BCSR part -struct BCRSArrays { - int m; - int nnz; - int nbBlocks; - int nbBlockRow; - int blockSize; - int* cu_bsrRowPtrC; - int* cu_bsrColIndC; - double* cu_bsrValC; - // CHECK: hipStream_t streamId; - cudaStream_t streamId; - // CHECK: hipsparseHandle_t cusparseHandle; - cusparseHandle_t cusparseHandle; - - BCRSArrays() { - cu_bsrRowPtrC = NULL; - cu_bsrColIndC = NULL; - cu_bsrValC = NULL; - // Create sparse handle (needed to call sparse functions - streamId = 0; - // CHECK: CudaSparseCheck(hipsparseCreate(&cusparseHandle)); - // CHECK: CudaSparseCheck(hipsparseSetStream(cusparseHandle, streamId)); - CudaSparseCheck(cusparseCreate(&cusparseHandle)); - CudaSparseCheck(cusparseSetStream(cusparseHandle, streamId)); - } - - ~BCRSArrays() { - // CHECK: CudaCheck(hipFree(cu_bsrRowPtrC)); - // CHECK: CudaCheck(hipFree(cu_bsrColIndC)); - // CHECK: CudaCheck(hipFree(cu_bsrValC)); - CudaCheck(cudaFree(cu_bsrRowPtrC)); - CudaCheck(cudaFree(cu_bsrColIndC)); - CudaCheck(cudaFree(cu_bsrValC)); - // Destroy sparse handle - // CHECK: CudaSparseCheck(hipsparseDestroy(cusparseHandle)); - CudaSparseCheck(cusparseDestroy(cusparseHandle)); - } -}; - -void CRS_to_BCRS(CRSArrays& csr, BCRSArrays* bcrs, const int blockSize) { - bcrs->m = csr.m; - bcrs->nnz = csr.nnz; - bcrs->blockSize = blockSize; - bcrs->nbBlockRow = (csr.m + blockSize-1)/blockSize; - // CHECK: hipMalloc((void**)&bcrs->cu_bsrRowPtrC, sizeof(int) *(bcrs->nbBlockRow+1)); - cudaMalloc((void**)&bcrs->cu_bsrRowPtrC, sizeof(int) *(bcrs->nbBlockRow+1)); - // CHECK: hipsparseMatDescr_t descr = 0; - cusparseMatDescr_t descr = 0; - // CHECK: CudaSparseCheck(hipsparseCreateMatDescr(&descr)); - // CHECK: hipsparseSetMatType(descr,HIPSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: hipsparseSetMatIndexBase(descr,HIPSPARSE_INDEX_BASE_ZERO); - CudaSparseCheck(cusparseCreateMatDescr(&descr)); - cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); - int nbNnzBlocks; - // NOTE: cusparseXcsr2bsrNnz and CUSPARSE_DIRECTION_COLUMN (of type cusparseDirection_t) are yet unsupported by HIP - // CHECK-NOT: hipsparseXcsr2bsrNnz(bcrs->cusparseHandle, HIPSPARSE_DIRECTION_COLUMN, csr.m, csr.m, descr, csr.cu_csrRowPtrA, csr.cu_csrColIndA, - cusparseXcsr2bsrNnz(bcrs->cusparseHandle, CUSPARSE_DIRECTION_COLUMN, csr.m, csr.m, descr, csr.cu_csrRowPtrA, csr.cu_csrColIndA, - blockSize, descr, bcrs->cu_bsrRowPtrC, &nbNnzBlocks); - { - int firstBlockIdx, lastBlockIdx; - // CHECK: hipMemcpy(&lastBlockIdx, bcrs->cu_bsrRowPtrC+bcrs->nbBlockRow, sizeof(int), hipMemcpyDeviceToHost); - // CHECK: hipMemcpy(&firstBlockIdx, bcrs->cu_bsrRowPtrC, sizeof(int), hipMemcpyDeviceToHost); - cudaMemcpy(&lastBlockIdx, bcrs->cu_bsrRowPtrC+bcrs->nbBlockRow, sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(&firstBlockIdx, bcrs->cu_bsrRowPtrC, sizeof(int), cudaMemcpyDeviceToHost); - assert(firstBlockIdx == 0); // we are in base 0 - assert(nbNnzBlocks == lastBlockIdx - firstBlockIdx); - } - bcrs->nbBlocks = nbNnzBlocks; - // CHECK: CudaCheck(hipMalloc((void**)&bcrs->cu_bsrColIndC, sizeof(int)*nbNnzBlocks)); - // CHECK: CudaCheck(hipMalloc((void**)&bcrs->cu_bsrValC, sizeof(double)*(blockSize*blockSize)*nbNnzBlocks)); - CudaCheck(cudaMalloc((void**)&bcrs->cu_bsrColIndC, sizeof(int)*nbNnzBlocks)); - CudaCheck(cudaMalloc((void**)&bcrs->cu_bsrValC, sizeof(double)*(blockSize*blockSize)*nbNnzBlocks)); - // NOTE: cusparseDcsr2bsr and CUSPARSE_DIRECTION_COLUMN (of type cusparseDirection_t) are yet unsupported by HIP - // CHECK-NOT: hipsparseDcsr2bsr(bcrs->cusparseHandle, HIPSPARSE_DIRECTION_COLUMN, - cusparseDcsr2bsr(bcrs->cusparseHandle, CUSPARSE_DIRECTION_COLUMN, - csr.m, csr.m, descr, csr.cu_csrValA, csr.cu_csrRowPtrA, csr.cu_csrColIndA, blockSize, descr, bcrs->cu_bsrValC, bcrs->cu_bsrRowPtrC, bcrs->cu_bsrColIndC); -} - -double compute_BSR(BCRSArrays& bcsr, double *x , double *y){ - // For blas 2 gemv y = alpha.x.A + Beta.y - const double alpha = 1.0; - const double beta = 0.0; - // Copy input - const int sizeMultipleBlockSize = ((bcsr.m+bcsr.blockSize-1)/bcsr.blockSize)*bcsr.blockSize; - double* cu_x = allocAndCopyPart(x, bcsr.m, sizeMultipleBlockSize); - double* cu_y = allocAndCopyPart(y, bcsr.m, sizeMultipleBlockSize); - // Init matrix properties - // CHECK: hipsparseMatDescr_t descr = 0; - // CHECK: CudaSparseCheck(hipsparseCreateMatDescr(&descr)); - // CHECK: hipsparseSetMatType(descr,HIPSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: hipsparseSetMatIndexBase(descr,HIPSPARSE_INDEX_BASE_ZERO); - cusparseMatDescr_t descr = 0; - CudaSparseCheck(cusparseCreateMatDescr(&descr)); - cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); - // Compute gemv - float gemvComputeTume = 0; - { - // CHECK: hipEvent_t startTime, stopTime; - // CHECK: hipEventCreate(&startTime); - // CHECK: hipEventCreate(&stopTime); - // CHECK: hipEventRecord(startTime, bcsr.streamId); - cudaEvent_t startTime, stopTime; - cudaEventCreate(&startTime); - cudaEventCreate(&stopTime); - cudaEventRecord(startTime, bcsr.streamId); - // CHECK: cusparseDbsrmv(bcsr.cusparseHandle, HIPSPARSE_DIRECTION_COLUMN, HIPSPARSE_OPERATION_NON_TRANSPOSE, - cusparseDbsrmv(bcsr.cusparseHandle, CUSPARSE_DIRECTION_COLUMN, CUSPARSE_OPERATION_NON_TRANSPOSE, - bcsr.nbBlockRow, bcsr.m, bcsr.nbBlocks, &alpha, descr, - bcsr.cu_bsrValC, bcsr.cu_bsrRowPtrC, bcsr.cu_bsrColIndC, bcsr.blockSize, - cu_x, &beta, cu_y); - // CHECK: hipEventRecord(stopTime, bcsr.streamId); - // CHECK: hipEventSynchronize(stopTime); - // CHECK: hipEventElapsedTime(&gemvComputeTume, startTime, stopTime); - cudaEventRecord(stopTime, bcsr.streamId); - cudaEventSynchronize(stopTime); - cudaEventElapsedTime(&gemvComputeTume, startTime, stopTime); - gemvComputeTume /=1000.0; - } - // Get back result - // CHECK: CudaCheck( hipMemcpy(y, cu_y, bcsr.m*sizeof(double), hipMemcpyDeviceToHost ) ); - CudaCheck( cudaMemcpy(y, cu_y, bcsr.m*sizeof(double), cudaMemcpyDeviceToHost ) ); - // Dealloc vectors - // CHECK: CudaCheck(hipFree(cu_x)); - // CHECK: CudaCheck(hipFree(cu_y)); - CudaCheck(cudaFree(cu_x)); - CudaCheck(cudaFree(cu_y)); - return gemvComputeTume; -} diff --git a/tests/hipify-clang/unit_tests/namespace/ns_kernel_launch.cu b/tests/hipify-clang/unit_tests/namespace/ns_kernel_launch.cu deleted file mode 100644 index 0d9fa1cf3f..0000000000 --- a/tests/hipify-clang/unit_tests/namespace/ns_kernel_launch.cu +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include - -__global__ void test_0() { - int a = 10; -} - -namespace first { - __global__ void test_1() { - int b = 20; - } - namespace second { - __global__ void test_2() { - int c = 30; - } - } -} - -int main() { - // CHECK: hipLaunchKernelGGL(::test_0, dim3(1), dim3(1), 0, 0); - ::test_0<<<1, 1>>>(); - // CHECK: hipLaunchKernelGGL(first::test_1, dim3(1), dim3(1), 0, 0); - first::test_1<<<1, 1>>>(); - // CHECK: hipLaunchKernelGGL(first::second::test_2, dim3(1), dim3(1), 0, 0); - first::second::test_2<<<1, 1>>>(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals.cu b/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals.cu deleted file mode 100644 index 51bfeb6017..0000000000 --- a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals.cu +++ /dev/null @@ -1,30 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "--skip-excluded-preprocessor-conditional-blocks" %clang_args -// CHECK: #include - -#include - -__global__ void axpy_kernel(float a, float* x, float* y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -void axpy(float a, float* x, float* y) { - -#ifdef SOME_MACRO - // CHECK: axpy_kernel <<<1, 1>>> (a, y, x); - axpy_kernel <<<1, 1>>> (a, y, x); -#endif - -#ifndef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y, x); - axpy_kernel <<<1, 2>>> (a, y, x); -#endif - -#ifdef SOME_MACRO - // CHECK: axpy_kernel <<<1, 3>>> (a, y, x); - axpy_kernel <<<1, 3>>> (a, y, x); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); - axpy_kernel <<<1, 4>>> (a, x, y); -#endif - -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01.cu b/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01.cu deleted file mode 100644 index 310d896054..0000000000 --- a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01.cu +++ /dev/null @@ -1,52 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "--skip-excluded-preprocessor-conditional-blocks" %clang_args -// CHECK: #include - -__global__ void axpy_kernel(float a, float* x, float* y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -void axpy(float a, float* x, float* y) { -float* y_new = nullptr; -#ifdef SOME_MACRO - y_new = x; - // CHECK: axpy_kernel <<<1, 1>>> (a, y_new, x); - axpy_kernel <<<1, 1>>> (a, y_new, x); -#endif - -#ifndef SOME_MACRO - y_new = y; - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y_new, x); - axpy_kernel <<<1, 2>>> (a, y_new, x); -#endif - -#ifdef SOME_MACRO - // CHECK: axpy_kernel <<<1, 3>>> (a, y, x); - axpy_kernel <<<1, 3>>> (a, y, x); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); - axpy_kernel <<<1, 4>>> (a, x, y); -#endif - -#ifdef SOME_MACRO - // CHECK: axpy_kernel <<<1, 5>>> (a, y, x); - axpy_kernel <<<1, 5>>> (a, y, x); -#elif defined SOME_MACRO_1 - // CHECK: axpy_kernel <<<1, 6>>> (a, x, y); - axpy_kernel <<<1, 6>>> (a, x, y); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(7), 0, 0, a, x, y); - axpy_kernel <<<1, 7>>> (a, x, y); -#endif - -#ifndef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(8), 0, 0, a, y, x); - axpy_kernel <<<1, 8>>> (a, y, x); -#elif !defined(SOME_MACRO_1) - // CHECK: axpy_kernel <<<1, 9>>> (a, x, y); - axpy_kernel <<<1, 9>>> (a, x, y); -#else - // CHECK: axpy_kernel <<<1, 10>>> (a, x, y); - axpy_kernel <<<1, 10>>> (a, x, y); -#endif - -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu b/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu deleted file mode 100644 index a5c7c41745..0000000000 --- a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu +++ /dev/null @@ -1,52 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include - -__global__ void axpy_kernel(float a, float* x, float* y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -void axpy(float a, float* x, float* y) { -float* y_new = nullptr; -#ifdef SOME_MACRO - y_new = x; - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(1), 0, 0, a, y_new, x); - axpy_kernel <<<1, 1>>> (a, y_new, x); -#endif - -#ifndef SOME_MACRO - y_new = y; - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y_new, x); - axpy_kernel <<<1, 2>>> (a, y_new, x); -#endif - -#ifdef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(3), 0, 0, a, y, x); - axpy_kernel <<<1, 3>>> (a, y, x); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); - axpy_kernel <<<1, 4>>> (a, x, y); -#endif - -#ifdef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(5), 0, 0, a, y, x); - axpy_kernel <<<1, 5>>> (a, y, x); -#elif defined SOME_MACRO_1 - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(6), 0, 0, a, x, y); - axpy_kernel <<<1, 6>>> (a, x, y); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(7), 0, 0, a, x, y); - axpy_kernel <<<1, 7>>> (a, x, y); -#endif - -#ifndef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(8), 0, 0, a, y, x); - axpy_kernel <<<1, 8>>> (a, y, x); -#elif !defined(SOME_MACRO_1) - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(9), 0, 0, a, x, y); - axpy_kernel <<<1, 9>>> (a, x, y); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(10), 0, 0, a, x, y); - axpy_kernel <<<1, 10>>> (a, x, y); -#endif - -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu b/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu deleted file mode 100644 index 06ce48ebef..0000000000 --- a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu +++ /dev/null @@ -1,30 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include - -#include - -__global__ void axpy_kernel(float a, float* x, float* y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -void axpy(float a, float* x, float* y) { - -#ifdef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(1), 0, 0, a, y, x); - axpy_kernel <<<1, 1>>> (a, y, x); -#endif - -#ifndef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y, x); - axpy_kernel <<<1, 2>>> (a, y, x); -#endif - -#ifdef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(3), 0, 0, a, y, x); - axpy_kernel <<<1, 3>>> (a, y, x); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); - axpy_kernel <<<1, 4>>> (a, x, y); -#endif - -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp deleted file mode 100644 index 2bf196c0cd..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -// CHECK: #include -#include - -#define WIDTH 1024 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -// Device (Kernel) function, it must be void -__global__ void matrixTranspose(float* out, float* in, const int width) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - out[y * width + x] = in[x * width + y]; -} - -// CPU implementation of matrix transpose -void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - -int main() { - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; - - float* gpuMatrix; - float* gpuTransposeMatrix; - - // CHECK: hipDeviceProp_t devProp; - cudaDeviceProp devProp; - // CHECK: hipGetDeviceProperties(&devProp, 0); - cudaGetDeviceProperties(&devProp, 0); - - std::cout << "Device name " << devProp.name << std::endl; - - int i; - int errors; - - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } - - // allocate the memory on the device side - // CHECK: hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - - // Memory transfer from host to device - // CHECK: hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), cudaMemcpyHostToDevice); - - // Lauching kernel from host - - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose, dim3(dimGrid), dim3(dimBlock), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); - matrixTranspose <<>>(gpuTransposeMatrix, gpuMatrix, WIDTH); - - // Memory transfer from device to host - // CHECK: hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), cudaMemcpyDeviceToHost); - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - errors = 0; - double eps = 1.0E-6; - for (i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - // CHECK: hipFree(gpuMatrix); - cudaFree(gpuMatrix); - // CHECK: hipFree(gpuTransposeMatrix); - cudaFree(gpuTransposeMatrix); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - return errors; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp deleted file mode 100644 index d5dffd0b09..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include -#include -// CHECK-NOT: #include -#include - -// CHECK: extern texture tex; -extern texture tex; - -extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - outputData[y * width + x] = tex2D(tex, x, y); -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp deleted file mode 100644 index e7953dc862..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include -#include -#include -#include -#include - -#define fileName "tex2dKernel.code" -// CHECK: texture tex; -texture tex; -bool testResult = false; - -// CHECK: hipError_t status = cmd; -// CHECK: if (status != hipSuccess) { -// CHECK: std::cout << "error: #" << status << " (" << hipGetErrorString(status) -#define CUDACHECK(cmd) \ - { \ - cudaError_t status = cmd; \ - if (status != cudaSuccess) { \ - std::cout << "error: #" << status << " (" << cudaGetErrorString(status) \ - << ") at line:" << __LINE__ << ": " << #cmd << std::endl; \ - abort(); \ - } \ - } - -bool runTest(int argc, char** argv) { - unsigned int width = 256; - unsigned int height = 256; - unsigned int size = width * height * sizeof(float); - float* hData = (float*)malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - // CHECK: hipModule_t Module; - CUmodule Module; - // CHECK: hipModuleLoad(&Module, fileName); - cuModuleLoad(&Module, fileName); - - // CHECK: hipArray * array; - CUarray array; - // CHECK: HIP_ARRAY_DESCRIPTOR desc; - CUDA_ARRAY_DESCRIPTOR desc; - // CHECK: desc.Format = HIP_AD_FORMAT_FLOAT; - desc.Format = CU_AD_FORMAT_FLOAT; - desc.NumChannels = 1; - desc.Width = width; - desc.Height = height; - // CHECK: hipArrayCreate(&array, &desc); - cuArrayCreate(&array, &desc); - - // CHECK: hip_Memcpy2D copyParam; - CUDA_MEMCPY2D copyParam; - memset(©Param, 0, sizeof(copyParam)); - // CHECK: copyParam.dstMemoryType = hipMemoryTypeArray; - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.dstArray = array; - // CHECK: copyParam.srcMemoryType = hipMemoryTypeHost; - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = hData; - copyParam.srcPitch = width * sizeof(float); - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = height; - // CHECK: hipMemcpyParam2D(©Param); - cuMemcpy2D(©Param); - - // CHECK: textureReference* texref; - CUtexref_st* texref; - // CHECK: hipModuleGetTexRef(&texref, Module, "tex"); - cuModuleGetTexRef(&texref, Module, "tex"); - // CHECK: hipTexRefSetAddressMode(texref, 0, hipAddressModeWrap); - cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP); - // CHECK: hipTexRefSetAddressMode(texref, 1, hipAddressModeWrap); - cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP); - // CHECK: hipTexRefSetFilterMode(texref, hipFilterModePoint); - cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT); - // CHECK: hipTexRefSetFlags(texref, 0); - cuTexRefSetFlags(texref, 0); - // CHECK: hipTexRefSetFormat(texref, HIP_AD_FORMAT_FLOAT, 1); - cuTexRefSetFormat(texref, CU_AD_FORMAT_FLOAT, 1); - // CHECK: hipTexRefSetArray(texref, array, HIP_TRSA_OVERRIDE_FORMAT); - cuTexRefSetArray(texref, array, CU_TRSA_OVERRIDE_FORMAT); - - float* dData = NULL; - // CHECK: hipMalloc((void**)&dData, size); - cudaMalloc((void**)&dData, size); - - struct { - void* _Ad; - unsigned int _Bd; - unsigned int _Cd; - } args; - args._Ad = (void*) dData; - args._Bd = width; - args._Cd = height; - - size_t sizeTemp = sizeof(args); - - // CHECK: void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, - // CHECK: &sizeTemp, HIP_LAUNCH_PARAM_END}; - void* config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &args, CU_LAUNCH_PARAM_BUFFER_SIZE, - &sizeTemp, CU_LAUNCH_PARAM_END}; - - // CHECK: hipFunction_t Function; - CUfunction Function; - // CHECK: hipModuleGetFunction(&Function, Module, "tex2dKernel"); - cuModuleGetFunction(&Function, Module, "tex2dKernel"); - - int temp1 = width / 16; - int temp2 = height / 16; - // CHECK: hipModuleLaunchKernel(Function, 16, 16, 1, temp1, temp2, 1, 0, 0, NULL, (void**)&config); - cuLaunchKernel(Function, 16, 16, 1, temp1, temp2, 1, 0, 0, NULL, (void**)&config); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - float* hOutputData = (float*)malloc(size); - memset(hOutputData, 0, size); - // CHECK: hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - cudaMemcpy(hOutputData, dData, size, cudaMemcpyDeviceToHost); - - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], - hOutputData[i * width + j]); - testResult = false; - break; - } - } - } - // CHECK: hipFree(dData); - cudaFree(dData); - // CHECK: hipFreeArray(hipArray_t(array)); - cudaFreeArray(cudaArray_t(array)); - return true; -} - -int main(int argc, char** argv) { - // CHECK: hipInit(0); - cuInit(0); - testResult = runTest(argc, argv); - printf("%s ...\n", testResult ? "PASSED" : "FAILED"); - exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp deleted file mode 100644 index d4277e133c..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp +++ /dev/null @@ -1,198 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include "hip/hip_runtime.h" -#include "cuda_runtime.h" -#include -#define NUM 1000000 - -// CHECK: if (status != hipSuccess) { -#define CUDA_CHECK(status) \ - if (status != cudaSuccess) { \ - std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \ - exit(0); \ - } - -// Device (Kernel) function -__global__ void multiply(float* C, float* A, float* B, int N) { - int tx = blockDim.x*blockIdx.x+threadIdx.x; - if (tx < N) { - C[tx] = A[tx] * B[tx]; - } -} - -// CPU implementation -void multiplyCPU(float* C, float* A, float* B, int N) { - for(unsigned int i=0; i>> (C, A, B, NUM); - - // Record the stop event - // CHECK: CUDA_CHECK(hipEventRecord(stop, NULL)); - CUDA_CHECK(cudaEventRecord(stop, NULL)); - // CHECK: CUDA_CHECK(hipEventSynchronize(stop)); - CUDA_CHECK(cudaEventSynchronize(stop)); - - // CHECK: CUDA_CHECK(hipEventElapsedTime(&eventMs, start, stop)); - CUDA_CHECK(cudaEventElapsedTime(&eventMs, start, stop)); - printf("kernel Execution time = %6.3fms\n", eventMs); - - // Calculate Occupancy - int numBlock = 0; - // CHECK: CUDA_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0)); - CUDA_CHECK(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0)); - - if(devProp.maxThreadsPerMultiProcessor) { - std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl; - } -} - -int main() { - float *A, *B, *C0, *C1, *cpuC; - float *Ad, *Bd, *C0d, *C1d; - int errors=0; - - // Initialize the input data - A = (float*)malloc(NUM * sizeof(float)); - B = (float*)malloc(NUM * sizeof(float)); - C0 = (float*)malloc(NUM * sizeof(float)); - C1 = (float*)malloc(NUM * sizeof(float)); - cpuC = (float*)malloc(NUM * sizeof(float)); - - for(int i=0; i< NUM; i++) { - A[i] = i; - B[i] = i; - } - - // Allocate the memory on the device side - // CHECK: CUDA_CHECK(hipMalloc((void**)&Ad, NUM * sizeof(float))); - CUDA_CHECK(cudaMalloc((void**)&Ad, NUM * sizeof(float))); - // CHECK: CUDA_CHECK(hipMalloc((void**)&Bd, NUM * sizeof(float))); - CUDA_CHECK(cudaMalloc((void**)&Bd, NUM * sizeof(float))); - // CHECK: CUDA_CHECK(hipMalloc((void**)&C0d, NUM * sizeof(float))); - CUDA_CHECK(cudaMalloc((void**)&C0d, NUM * sizeof(float))); - // CHECK: CUDA_CHECK(hipMalloc((void**)&C1d, NUM * sizeof(float))); - CUDA_CHECK(cudaMalloc((void**)&C1d, NUM * sizeof(float))); - - // Memory transfer from host to device - // CHECK: CUDA_CHECK(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(Ad,A,NUM * sizeof(float), cudaMemcpyHostToDevice)); - // CHECK: CUDA_CHECK(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(Bd,B,NUM * sizeof(float), cudaMemcpyHostToDevice)); - - // Kernel launch with manual/default block size - launchKernel(C0d, Ad, Bd, 1); - - // Kernel launch with the block size suggested by cudaOccupancyMaxPotentialBlockSize - launchKernel(C1d, Ad, Bd, 0); - - // Memory transfer from device to host - // CHECK: CUDA_CHECK(hipMemcpy(C0,C0d, NUM * sizeof(float), hipMemcpyDeviceToHost)); - CUDA_CHECK(cudaMemcpy(C0,C0d, NUM * sizeof(float), cudaMemcpyDeviceToHost)); - // CHECK: CUDA_CHECK(hipMemcpy(C1,C1d, NUM * sizeof(float), hipMemcpyDeviceToHost)); - CUDA_CHECK(cudaMemcpy(C1,C1d, NUM * sizeof(float), cudaMemcpyDeviceToHost)); - - // CPU computation - multiplyCPU(cpuC, A, B, NUM); - - // Verify the results - double eps = 1.0E-6; - - for (int i = 0; i < NUM; i++) { - if (std::abs(C0[i] - cpuC[i]) > eps) { - errors++; - } - } - - if (errors != 0) { - printf("\nManual Test FAILED: %d errors\n", errors); - errors=0; - } else { - printf("\nManual Test PASSED!\n"); - } - - for (int i = 0; i < NUM; i++) { - if (std::abs(C1[i] - cpuC[i]) > eps) { - errors++; - } - } - - if (errors != 0) { - printf("\n Automatic Test FAILED: %d errors\n", errors); - } else { - printf("\nAutomatic Test PASSED!\n"); - } - - // CHECK: CUDA_CHECK(hipFree(Ad)); - CUDA_CHECK(cudaFree(Ad)); - // CHECK: CUDA_CHECK(hipFree(Bd)); - CUDA_CHECK(cudaFree(Bd)); - // CHECK: CUDA_CHECK(hipFree(C0d)); - CUDA_CHECK(cudaFree(C0d)); - // CHECK: CUDA_CHECK(hipFree(C1d)); - CUDA_CHECK(cudaFree(C1d)); - - free(A); - free(B); - free(C0); - free(C1); - free(cpuC); -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp deleted file mode 100644 index 323be01ff1..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -// CHECK: #include -#include - -#define WIDTH 1024 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -// Device (Kernel) function, it must be void -__global__ void matrixTranspose(float* out, float* in, const int width) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - out[y * width + x] = in[x * width + y]; -} - -// CPU implementation of matrix transpose -void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - -int main() { - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; - - float* gpuMatrix; - float* gpuTransposeMatrix; - - // CHECK: hipDeviceProp_t devProp; - cudaDeviceProp devProp; - // CHECK: hipGetDeviceProperties(&devProp, 0); - cudaGetDeviceProperties(&devProp, 0); - - std::cout << "Device name " << devProp.name << std::endl; - - // CHECK: hipEvent_t start, stop; - cudaEvent_t start, stop; - // CHECK: hipEventCreate(&start); - cudaEventCreate(&start); - // CHECK: hipEventCreate(&stop); - cudaEventCreate(&stop); - float eventMs = 1.0f; - - int i; - int errors; - - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } - - // allocate the memory on the device side - // CHECK: hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Memory transfer from host to device - // CHECK: hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), cudaMemcpyHostToDevice); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Lauching kernel from host - - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose, dim3(dimGrid), dim3(dimBlock), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); - matrixTranspose <<>>(gpuTransposeMatrix, gpuMatrix, WIDTH); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - printf("kernel Execution time = %6.3fms\n", eventMs); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Memory transfer from device to host - // CHECK: hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), cudaMemcpyDeviceToHost); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - errors = 0; - double eps = 1.0E-6; - for (i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - // CHECK: hipFree(gpuMatrix); - cudaFree(gpuMatrix); - // CHECK: hipFree(gpuTransposeMatrix); - cudaFree(gpuTransposeMatrix); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - return errors; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp deleted file mode 100644 index 4a1b28001f..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp +++ /dev/null @@ -1,250 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// NOTE: Profiler API is under development. -// NOTE: This is NOT WORKING example. -// TODO: Get rid of HIP_SCOPED_MARKER, HIP_BEGIN_MARKER, HIP_END_MARKER, declared in hip/hip_profile.h or -// TODO: find out a way to hipify it in particular place (signatures are to obtain). - -#include - -// CHECK: #include -#include -// CHECK: #include -#include - -#define WIDTH 1024 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -#define ITERATIONS 10 - -// Cmdline parms to control start and stop triggers -int startTriggerIteration = -1; -int stopTriggerIteration = -1; - -// Device (Kernel) function, it must be void -__global__ void matrixTranspose(float* out, float* in, const int width) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - out[y * width + x] = in[x * width + y]; -} - -// CPU implementation of matrix transpose -void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - -// Use a separate function to demonstrate how to use function name as part of scoped marker: -void runGPU(float* Matrix, float* TransposeMatrix, float* gpuMatrix, float* gpuTransposeMatrix) { - // __func__ is a standard C++ macro which expands to the name of the function, in this case - // "runGPU" -// TODO: Find out signatures to generate the following: -// HIP_SCOPED_MARKER(__func__, "MyGroup"); - - for (int i = 0; i < ITERATIONS; i++) { - if (i == startTriggerIteration) { - // CHECK: hipProfilerStart(); - cudaProfilerStart(); - } - if (i == stopTriggerIteration) { - // CHECK: hipProfilerStop(); - cudaProfilerStop(); - } - - float eventMs = 0.0f; - - // CHECK: hipEvent_t start, stop; - cudaEvent_t start, stop; - // CHECK: hipEventCreate(&start); - cudaEventCreate(&start); - // CHECK: hipEventCreate(&stop); - cudaEventCreate(&stop); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Memory transfer from host to device - // CHECK: hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), cudaMemcpyHostToDevice); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - // CHECK: printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs); - printf("cudaMemcpyHostToDevice time taken = %6.3fms\n", eventMs); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Lauching kernel from host - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose, dim3(dimGrid), dim3(dimBlock), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); - matrixTranspose <<>> (gpuTransposeMatrix, gpuMatrix, WIDTH); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - printf("kernel Execution time = %6.3fms\n", eventMs); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Memory transfer from device to host - // CHECK: hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), cudaMemcpyDeviceToHost); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - // CHECK: printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); - printf("cudaMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); - } -}; - -int main(int argc, char* argv[]) { - if (argc >= 2) { - startTriggerIteration = atoi(argv[1]); - printf("info : will start tracing at iteration:%d\n", startTriggerIteration); - } - if (argc >= 3) { - stopTriggerIteration = atoi(argv[2]); - printf("info : will stop tracing at iteration:%d\n", stopTriggerIteration); - } - - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; - - float* gpuMatrix; - float* gpuTransposeMatrix; - - // CHECK: hipDeviceProp_t devProp; - cudaDeviceProp devProp; - // CHECK: hipGetDeviceProperties(&devProp, 0); - cudaGetDeviceProperties(&devProp, 0); - - std::cout << "Device name " << devProp.name << std::endl; - - { - // Show example of how to create a "scoped marker". - // The scoped marker records the time spent inside the { scope } of the marker - the begin - // timestamp is at the beginning of the code scope, and the end is recorded when the SCOPE - // exits. This can be viewed in CodeXL timeline relative to other GPU and CPU events. This - // marker captures the time spent in setup including host allocation, initialization, and - // device memory allocation. -// TODO: Find out signatures to generate the following: -// HIP_SCOPED_MARKER("Setup", "MyGroup"); - - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (int i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } - - // allocate the memory on the device side - // CHECK: hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - - // FYI, the scoped-marker will be destroyed here when the scope exits, and will record its - // "end" timestamp. - } - - runGPU(Matrix, TransposeMatrix, gpuMatrix, gpuTransposeMatrix); - - // show how to use explicit begin/end markers: - // We begin the timed region with HIP_BEGIN_MARKER, passing in the markerName and group: - // The region will stop when HIP_END_MARKER is called - // This is another way to mark begin/end - as an alternative to scoped markers. -// TODO: Find out signatures to generate the following: -// HIP_BEGIN_MARKER("Check&TearDown", "MyGroup"); - - int errors = 0; - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - double eps = 1.0E-6; - for (int i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - // CHECK: hipFree(gpuMatrix); - cudaFree(gpuMatrix); - // CHECK: hipFree(gpuTransposeMatrix); - cudaFree(gpuTransposeMatrix); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - // This ends the last marker started in this thread, in this case "Check&TearDown" -// TODO: Find out signatures to generate the following: -// HIP_END_MARKER(); - - return errors; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/7_streams/stream.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/7_streams/stream.cpp deleted file mode 100644 index b7fadbf936..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/7_streams/stream.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -// CHECK: #include -#include - -#define WIDTH 32 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -using namespace std; - -__global__ void matrixTranspose_static_shared(float* out, float* in, - const int width) { - // CHECK-NOT: HIP_DYNAMIC_SHARED(float, sharedMem); - // CHECK: __shared__ float sharedMem[WIDTH * WIDTH]; - __shared__ float sharedMem[WIDTH * WIDTH]; - - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - sharedMem[y * width + x] = in[x * width + y]; - - __syncthreads(); - - out[y * width + x] = sharedMem[y * width + x]; -} - -__global__ void matrixTranspose_dynamic_shared(float* out, float* in, - const int width) { - // declare dynamic shared memory - // CHECK-NOT: extern __shared__ - // CHECK: HIP_DYNAMIC_SHARED(int, sharedMem) - extern __shared__ int sharedMem[]; - - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - sharedMem[y * width + x] = in[x * width + y]; - - __syncthreads(); - - out[y * width + x] = sharedMem[y * width + x]; -} - -void MultipleStream(float** data, float* randArray, float** gpuTransposeMatrix, - float** TransposeMatrix, int width) { - const int num_streams = 2; - // CHECK: hipStream_t streams[num_streams]; - cudaStream_t streams[num_streams]; - - // CHECK: for (int i = 0; i < num_streams; i++) hipStreamCreate(&streams[i]); - for (int i = 0; i < num_streams; i++) cudaStreamCreate(&streams[i]); - - for (int i = 0; i < num_streams; i++) { - // CHECK: hipMalloc((void**)&data[i], NUM * sizeof(float)); - cudaMalloc((void**)&data[i], NUM * sizeof(float)); - // CHECK: hipMemcpyAsync(data[i], randArray, NUM * sizeof(float), hipMemcpyHostToDevice, streams[i]); - cudaMemcpyAsync(data[i], randArray, NUM * sizeof(float), cudaMemcpyHostToDevice, streams[i]); - } - - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose_static_shared, dim3(dimGrid), dim3(dimBlock), 0, streams[0], gpuTransposeMatrix[0], data[0], width); - matrixTranspose_static_shared <<>>(gpuTransposeMatrix[0], data[0], width); - // CHECK: hipLaunchKernelGGL(matrixTranspose_dynamic_shared, dim3(dimGrid), dim3(dimBlock), 0, streams[1], gpuTransposeMatrix[1], data[1], width); - matrixTranspose_dynamic_shared <<>>(gpuTransposeMatrix[1], data[1], width); - - for (int i = 0; i < num_streams; i++) - // CHECK: hipMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float), hipMemcpyDeviceToHost, streams[i]); - cudaMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float), cudaMemcpyDeviceToHost, streams[i]); -} - -int main() { - // CHECK: hipSetDevice(0); - cudaSetDevice(0); - - float *data[2], *TransposeMatrix[2], *gpuTransposeMatrix[2], *randArray; - - int width = WIDTH; - - randArray = (float*)malloc(NUM * sizeof(float)); - - TransposeMatrix[0] = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix[1] = (float*)malloc(NUM * sizeof(float)); - - // CHECK: hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)); - - for (int i = 0; i < NUM; i++) { - randArray[i] = (float)i * 1.0f; - } - - MultipleStream(data, randArray, gpuTransposeMatrix, TransposeMatrix, width); - - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - // verify the results - int errors = 0; - double eps = 1.0E-6; - for (int i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[0][i] - TransposeMatrix[1][i]) > eps) { - printf("%d stream0: %f stream1 %f\n", i, TransposeMatrix[0][i], TransposeMatrix[1][i]); - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("stream PASSED!\n"); - } - - free(randArray); - for (int i = 0; i < 2; i++) { - // CHECK: hipFree(data[i]); - cudaFree(data[i]); - // CHECK: hipFree(gpuTransposeMatrix[i]); - cudaFree(gpuTransposeMatrix[i]); - free(TransposeMatrix[i]); - } - - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp deleted file mode 100644 index be2ec7399d..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -// CHECK: #include -#include -#include -#define WIDTH 32 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -using namespace std; - -#define KNRM "\x1B[0m" -#define KRED "\x1B[31m" - -#define failed(...) \ - printf("%serror: ", KRED); \ - printf(__VA_ARGS__); \ - printf("\n"); \ - printf("error: TEST FAILED\n%s", KNRM); \ - abort(); - -// CHECK: hipError_t localError = error; -// CHECK: if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled) && -// CHECK: (localError != hipErrorPeerAccessNotEnabled )) { -// CHECK: printf("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), -#define CUDACHECK(error) \ - { \ - cudaError_t localError = error; \ - if ((localError != cudaSuccess) && (localError != cudaErrorPeerAccessAlreadyEnabled) && \ - (localError != cudaErrorPeerAccessNotEnabled )) { \ - printf("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, cudaGetErrorString(localError), \ - localError, #error, __FILE__, __LINE__, KNRM); \ - failed("API returned error code."); \ - } \ - } - -void checkPeer2PeerSupport() { - int gpuCount; - int canAccessPeer; - // CHECK: CUDACHECK(hipGetDeviceCount(&gpuCount)); - CUDACHECK(cudaGetDeviceCount(&gpuCount)); - for (int currentGpu = 0; currentGpu < gpuCount; currentGpu++) { - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - for (int peerGpu = 0; peerGpu < currentGpu; peerGpu++) { - if (currentGpu != peerGpu) { - // CHECK: CUDACHECK(hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu)); - CUDACHECK(cudaDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu)); - printf("currentGpu#%d canAccessPeer: peerGpu#%d=%d\n", currentGpu, peerGpu, - canAccessPeer); - } - // CHECK: CUDACHECK(hipSetDevice(peerGpu)); - CUDACHECK(cudaSetDevice(peerGpu)); - // CHECK: CUDACHECK(hipDeviceReset()); - CUDACHECK(cudaDeviceReset()); - } - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - // CHECK: CUDACHECK(hipDeviceReset()); - CUDACHECK(cudaDeviceReset()); - } -} - -void enablePeer2Peer(int currentGpu, int peerGpu) { - int canAccessPeer; - // Must be on a multi-gpu system: - assert(currentGpu != peerGpu); - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - // CHECK: hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - cudaDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - if (canAccessPeer == 1) { - // CHECK: CUDACHECK(hipDeviceEnablePeerAccess(peerGpu, 0)); - CUDACHECK(cudaDeviceEnablePeerAccess(peerGpu, 0)); - } else - printf("peer2peer transfer not possible between the selected gpu devices"); -} - -void disablePeer2Peer(int currentGpu, int peerGpu) { - int canAccessPeer; - // Must be on a multi-gpu system: - assert(currentGpu != peerGpu); - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - // CHECK: hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - cudaDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - if (canAccessPeer == 1) { - // CHECK: CUDACHECK(hipDeviceDisablePeerAccess(peerGpu)); - CUDACHECK(cudaDeviceDisablePeerAccess(peerGpu)); - } else - printf("peer2peer disable not required"); -} - -__global__ void matrixTranspose_static_shared(float* out, float* in, - const int width) { - // CHECK-NOT: HIP_DYNAMIC_SHARED(float, sharedMem); - // CHECK: __shared__ float sharedMem[WIDTH * WIDTH]; - __shared__ float sharedMem[WIDTH * WIDTH]; - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - sharedMem[y * width + x] = in[x * width + y]; - __syncthreads(); - out[y * width + x] = sharedMem[y * width + x]; -} - -__global__ void matrixTranspose_dynamic_shared(float* out, float* in, - const int width) { - // declare dynamic shared memory - // CHECK-NOT: extern __shared__ - // CHECK: HIP_DYNAMIC_SHARED(float, sharedMem) - extern __shared__ float sharedMem[]; - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - sharedMem[y * width + x] = in[x * width + y]; - __syncthreads(); - out[y * width + x] = sharedMem[y * width + x]; -} - -int main() { - checkPeer2PeerSupport(); - int gpuCount; - int currentGpu, peerGpu; - // CHECK: CUDACHECK(hipGetDeviceCount(&gpuCount)); - CUDACHECK(cudaGetDeviceCount(&gpuCount)); - if (gpuCount < 2) { - printf("Peer2Peer application requires atleast 2 gpu devices"); - return 0; - } - currentGpu = 0; - peerGpu = (currentGpu + 1); - printf("currentGpu=%d peerGpu=%d (Total no. of gpu = %d)\n", currentGpu, peerGpu, gpuCount); - float *data[2], *TransposeMatrix[2], *gpuTransposeMatrix[2], *randArray; - int width = WIDTH; - randArray = (float*)malloc(NUM * sizeof(float)); - for (int i = 0; i < NUM; i++) { - randArray[i] = (float)i * 1.0f; - } - enablePeer2Peer(currentGpu, peerGpu); - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - TransposeMatrix[0] = (float*)malloc(NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&data[0], NUM * sizeof(float)); - cudaMalloc((void**)&data[0], NUM * sizeof(float)); - // CHECK: hipMemcpy(data[0], randArray, NUM * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(data[0], randArray, NUM * sizeof(float), cudaMemcpyHostToDevice); - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose_static_shared, dim3(dimGrid), dim3(dimBlock), 0, 0, gpuTransposeMatrix[0], data[0], width); - matrixTranspose_static_shared <<>>(gpuTransposeMatrix[0], data[0], width); - // CHECK: CUDACHECK(hipSetDevice(peerGpu)); - CUDACHECK(cudaSetDevice(peerGpu)); - TransposeMatrix[1] = (float*)malloc(NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&data[1], NUM * sizeof(float)); - cudaMalloc((void**)&data[1], NUM * sizeof(float)); - // CHECK: hipMemcpy(data[1], gpuTransposeMatrix[0], NUM * sizeof(float), hipMemcpyDeviceToDevice); - cudaMemcpy(data[1], gpuTransposeMatrix[0], NUM * sizeof(float), cudaMemcpyDeviceToDevice); - // CHECK: hipLaunchKernelGGL(matrixTranspose_dynamic_shared, dim3(dimGrid), dim3(dimBlock), sizeof(float) * WIDTH * WIDTH, 0, gpuTransposeMatrix[1], data[1], width); - matrixTranspose_dynamic_shared <<>>(gpuTransposeMatrix[1], data[1], width); - // CHECK: hipMemcpy(TransposeMatrix[1], gpuTransposeMatrix[1], NUM * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(TransposeMatrix[1], gpuTransposeMatrix[1], NUM * sizeof(float), cudaMemcpyDeviceToHost); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - disablePeer2Peer(currentGpu, peerGpu); - // verify the results - int errors = 0; - double eps = 1.0E-6; - for (int i = 0; i < NUM; i++) { - if (std::abs(randArray[i] - TransposeMatrix[1][i]) > eps) { - printf("%d cpu: %f gpu peered data %f\n", i, randArray[i], TransposeMatrix[1][i]); - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("Peer2Peer PASSED!\n"); - } - free(randArray); - for (int i = 0; i < 2; i++) { - // CHECK: hipFree(data[i]); - cudaFree(data[i]); - // CHECK: hipFree(gpuTransposeMatrix[i]); - cudaFree(gpuTransposeMatrix[i]); - free(TransposeMatrix[i]); - } - // CHECK: CUDACHECK(hipSetDevice(peerGpu)); - CUDACHECK(cudaSetDevice(peerGpu)); - // CHECK: CUDACHECK(hipDeviceReset()); - CUDACHECK(cudaDeviceReset()); - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - // CHECK: CUDACHECK(hipDeviceReset()); - CUDACHECK(cudaDeviceReset()); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/MallocManaged.cpp b/tests/hipify-clang/unit_tests/samples/MallocManaged.cpp deleted file mode 100644 index d9122160b1..0000000000 --- a/tests/hipify-clang/unit_tests/samples/MallocManaged.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include - -__global__ -void add(int n, float *x, float *y) -{ - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) - y[i] = x[i] + y[i]; -} - -int main(int argc, char *argv[]) -{ - int numElements = 10; - bool testResult = true; - float *A, *B; - // CHECK: hipMallocManaged(&A, numElements * sizeof(float)); - cudaMallocManaged(&A, numElements * sizeof(float)); - // CHECK: hipMallocManaged(&B, numElements * sizeof(float)); - cudaMallocManaged(&B, numElements * sizeof(float)); - for (int i = 0; i < numElements; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - } - int blockSize = 256; - int numBlocks = (numElements + blockSize - 1) / blockSize; - dim3 dimGrid(numBlocks, 1, 1); - dim3 dimBlock(blockSize, 1, 1); - // CHECK: hipLaunchKernelGGL(add, dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B); - add<<>>(numElements, A, B); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - float maxError = 0.0f; - for (int i = 0; i < numElements; i++) - maxError = fmax(maxError, fabs(B[i]-3.0f)); - // CHECK: hipFree(A); - cudaFree(A); - // CHECK: hipFree(B); - cudaFree(B); - if(maxError == 0.0f) - return 0; - return -1; -} diff --git a/tests/hipify-clang/unit_tests/samples/allocators.cu b/tests/hipify-clang/unit_tests/samples/allocators.cu deleted file mode 100644 index 4d33e315bf..0000000000 --- a/tests/hipify-clang/unit_tests/samples/allocators.cu +++ /dev/null @@ -1,53 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#pragma once -// CHECK: #include -#include -#include - -/** - * Allocate GPU memory for `count` elements of type `T`. - */ -template -static T* gpuMalloc(size_t count) { - T* ret = nullptr; - // CHECK: hipMalloc(&ret, count * sizeof(T)); - cudaMalloc(&ret, count * sizeof(T)); - return ret; -} - -template -__global__ void add(int n, T* x, T* y) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) - y[i] = x[i] + y[i]; -} - -int main(int argc, char* argv[]) { - size_t numElements = 50; - float *A = gpuMalloc(numElements); - float* B = gpuMalloc(numElements); - for (int i = 0; i < numElements; ++i) { - A[i] = 1.0f; - B[i] = 2.0f; - } - int blockSize = 512; - int numBlocks = (numElements + blockSize - 1) / blockSize; - dim3 dimGrid(numBlocks, 1, 1); - dim3 dimBlock(blockSize, 1, 1); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(add), dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B); - add<<>>(numElements, A, B); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - float maxError = 0.0f; - for (int i = 0; i < numElements; ++i) - maxError = fmax(maxError, fabs(B[i] - 3.0f)); - // CHECK: hipFree(A); - cudaFree(A); - // CHECK: hipFree(B); - cudaFree(B); - if (maxError == 0.0f) - return 0; - return -1; - } diff --git a/tests/hipify-clang/unit_tests/samples/axpy.cu b/tests/hipify-clang/unit_tests/samples/axpy.cu deleted file mode 100644 index c09c372967..0000000000 --- a/tests/hipify-clang/unit_tests/samples/axpy.cu +++ /dev/null @@ -1,97 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#include - -// CHECK: #include -#include - -#define TOKEN_PASTE(X, Y) X ## Y -#define ARG_LIST_AS_MACRO a, device_x, device_y -#define KERNEL_CALL_AS_MACRO axpy<<<1, kDataLen>>> -#define KERNEL_NAME_MACRO axpy - -// CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y) -#define COMPLETE_LAUNCH axpy<<<1, kDataLen>>>(a, device_x, device_y) - - -template -__global__ void axpy(T a, T *x, T *y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -__global__ void empty() { -} - -int main(int argc, char* argv[]) { - const int kDataLen = 4; - - float a = 2.0f; - float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; - float host_y[kDataLen]; - - // Copy input data to device. - float* device_x; - float* device_y; - - // CHECK: hipMalloc(&device_x, kDataLen * sizeof(float)); - cudaMalloc(&device_x, kDataLen * sizeof(float)); - -#ifdef HERRING - // CHECK: hipMalloc(&device_y, kDataLen * sizeof(float)); - cudaMalloc(&device_y, kDataLen * sizeof(float)); -#else - // CHECK: hipMalloc(&device_y, kDataLen * sizeof(double)); - cudaMalloc(&device_y, kDataLen * sizeof(double)); -#endif - - // CHECK: hipMemcpy(device_x, host_x, kDataLen * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); - - // Launch the kernel in numerous different strange ways to exercise the prerocessor. - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); - axpy<<<1, kDataLen>>>(a, device_x, device_y); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); - axpy<<<1, kDataLen>>>(a, device_x, device_y); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y); - axpy<<<1, kDataLen>>>(a, TOKEN_PASTE(device, _x), device_y); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); - axpy<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(KERNEL_NAME_MACRO), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); - KERNEL_NAME_MACRO<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); - KERNEL_CALL_AS_MACRO(ARG_LIST_AS_MACRO); - - // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); - empty<<<1, kDataLen>>> ( ); - - // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); - empty<<<1, kDataLen, 0>>>(); - - // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); - empty<<<1, kDataLen, 0, 0>>>(); - - // CHECK: COMPLETE_LAUNCH; - COMPLETE_LAUNCH; - - - // Copy output data to host. - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - // CHECK: hipMemcpy(host_y, device_y, kDataLen * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), cudaMemcpyDeviceToHost); - - // Print the results. - for (int i = 0; i < kDataLen; ++i) { - std::cout << "y[" << i << "] = " << host_y[i] << "\n"; - } - - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/coalescing.cu b/tests/hipify-clang/unit_tests/samples/coalescing.cu deleted file mode 100644 index ec4645d673..0000000000 --- a/tests/hipify-clang/unit_tests/samples/coalescing.cu +++ /dev/null @@ -1,117 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// To measure effects of memory coalescing. Coalescing.cu -// B. Wilkinson Jan 30, 2011 - -#include -#include -#include -// CHECK: #include -#include - -#define BlockSize 16 // Size of blocks, 32 x 32 threads, fixed, used globally - -__global__ void gpu_Comput (int *h, int N, int T) { - -// Array loaded with global thread ID that acesses that location - - int col = threadIdx.x + blockDim.x * blockIdx.x; - int row = threadIdx.y + blockDim.y * blockIdx.y; - - int threadID = col + row * N; - int index = row + col * N; // sequentially down each row - - for (int t = 0; t < T; t++) // loop to repeat to reduce other time effects - h[index] = threadID; // load array with flattened global thread ID -} - -void printArray(int *h, int N) { - - printf("Results of computation, every N/8 numbers, eight numbers\n"); - - for (int row = 0; row < N; row += N/8) { - for (int col = 0; col < N; col += N/8) - printf("%6d ", h[col + row * N]); - printf("\n"); - } -} - -int main(int argc, char *argv[]) { - - int T = 100; // number of iterations, entered at keyboard - int B = 1; // number of blocks, entered at keyboard - char key; - - int *h, *dev_h; // ptr to array holding numbers on host and device - // CHECK: hipEvent_t start, stop; - cudaEvent_t start, stop; // cuda events to measure time - float elapsed_time_ms1; - // CHECK: hipEventCreate( &start ); - // CHECK: hipEventCreate( &stop ); - cudaEventCreate( &start ); - cudaEventCreate( &stop ); - -/* ------------------------- Keyboard input -----------------------------------*/ - -do { // loop to repeat complete program - - printf("Grid Structure 2-D grid, 2-D blocks\n"); - printf("Blocks fixed at 16 x 16 threads, 512 threads, max for compute cap. 1.x\n"); - printf("Enter number of blocks in grid, each dimension, currently %d\n",B); - scanf("%d",&B); - printf("Enter number of iterations, currently %d\n",T); - scanf("%d",&T); - - int N = B * BlockSize; // size of data array, given input data - - printf("Array size (and total grid-block size) %d x %d\n", N, N); - - dim3 Block(BlockSize, BlockSize); //Block structure, 32 x 32 max - dim3 Grid(B, B); //Grid structure, B x B - -/* ------------------------- Allocate Memory-----------------------------------*/ - - int size = N * N * sizeof(int); // number of bytes in total in array - h = (int*) malloc(size); // Array on host - // CHECK: hipMalloc((void**)&dev_h, size); - cudaMalloc((void**)&dev_h, size); // allocate device memory - -/* ------------------------- GPU Computation -----------------------------------*/ - - // CHECK: hipEventRecord( start, 0 ); - cudaEventRecord( start, 0 ); - // CHECK: hipLaunchKernelGGL(gpu_Comput, dim3(Grid), dim3(Block), 0, 0, dev_h, N, T); - gpu_Comput<<< Grid, Block >>>(dev_h, N, T); - // CHECK: hipEventRecord( stop, 0 ); - // CHECK: hipEventSynchronize( stop ); - // CHECK: hipEventElapsedTime( &elapsed_time_ms1, start, stop ); - cudaEventRecord( stop, 0 ); // instrument code to measue end time - cudaEventSynchronize( stop ); // wait for all work done by threads - cudaEventElapsedTime( &elapsed_time_ms1, start, stop ); - // CHECK: hipMemcpy(h,dev_h, size ,hipMemcpyDeviceToHost); - cudaMemcpy(h,dev_h, size ,cudaMemcpyDeviceToHost); //Get results to check - - printArray(h,N); - printf("\nTime to calculate results on GPU: %f ms.\n", elapsed_time_ms1); - -/* -------------------------REPEAT PROGRAM INPUT-----------------------------------*/ - - printf("\nEnter c to repeat, return to terminate\n"); - - scanf("%c",&key); - scanf("%c",&key); - -} while (key == 'c'); // loop of complete program - -/* -------------- clean up ---------------------------------------*/ - -free(h); - // CHECK: hipFree(dev_h); - cudaFree(dev_h); - // CHECK: hipEventDestroy(start); - // CHECK: hipEventDestroy(stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/cudaRegister.cu b/tests/hipify-clang/unit_tests/samples/cudaRegister.cu deleted file mode 100644 index 2cc754300a..0000000000 --- a/tests/hipify-clang/unit_tests/samples/cudaRegister.cu +++ /dev/null @@ -1,106 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include -#include - -#define LEN 1024 -#define SIZE LEN * sizeof(float) -#define ITER 1024*1024 - -// CHECK: if(status != hipSuccess) { -#define check(msg, status){ \ -if(status != cudaSuccess) { \ - printf("%s failed. \n", #msg); \ -} \ -} - -__global__ void Inc1(float *Ad, float *Bd){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if(tx < 1 ){ - for(int i=0;i>>(Ad, Bd); - A[0] = -(ITER*1.0f); - std::cout<<"Same cache line before completion: \t"<< A[0]<>>(Ad, Bd); - A[0] = -(ITER*1.0f); - std::cout<<"Diff cache line before completion: \t"< -// CHECK: #include -#include - -__global__ void dynamicReverse(int *d, int n) -{ - // Dynamic shared memory - // CHECK-NOT: extern __shared__ - // CHECK: HIP_DYNAMIC_SHARED(int, s); - extern __shared__ int s[]; - int t = threadIdx.x; - int tr = n-t-1; - s[t] = d[t]; - __syncthreads(); - d[t] = s[tr]; -} - -int main(void) -{ - const int n = 64; - int a[n], r[n], d[n]; - - for (int i = 0; i < n; i++) { - a[i] = i; - r[i] = n-i-1; - d[i] = 0; - } - - int *d_d; - // CHECK: hipMalloc(&d_d, n * sizeof(int)); - cudaMalloc(&d_d, n * sizeof(int)); - // run version with dynamic shared memory - // CHECK: hipMemcpy(d_d, a, n*sizeof(int), hipMemcpyHostToDevice); - cudaMemcpy(d_d, a, n*sizeof(int), cudaMemcpyHostToDevice); - // CHECK: hipLaunchKernelGGL(dynamicReverse, dim3(1), dim3(n), n*sizeof(int), 0, d_d, n); - dynamicReverse<<<1,n,n*sizeof(int)>>>(d_d, n); - // CHECK: hipMemcpy(d, d_d, n*sizeof(int), hipMemcpyDeviceToHost); - cudaMemcpy(d, d_d, n*sizeof(int), cudaMemcpyDeviceToHost); - for (int i = 0; i < n; i++) - if (d[i] != r[i]) printf("Error: d[%d]!=r[%d] (%d, %d)n", i, i, d[i], r[i]); -} diff --git a/tests/hipify-clang/unit_tests/samples/intro.cu b/tests/hipify-clang/unit_tests/samples/intro.cu deleted file mode 100644 index 5ae5479aa9..0000000000 --- a/tests/hipify-clang/unit_tests/samples/intro.cu +++ /dev/null @@ -1,174 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#include -#include -#include -// CHECK: #include -#include - -#define K_THREADS 64 -#define K_INDEX() ((gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x) -#define RND() ((rand() & 0x7FFF) / float(0x8000)) -#define ERRORCHECK() cErrorCheck(__FILE__, __LINE__) - -// CHECK: hipEvent_t t##_start, t##_end; \ -// CHECK: hipEventCreate(&t##_start); \ -// CHECK: hipEventCreate(&t##_end); -#define TIMER_CREATE(t) \ - cudaEvent_t t##_start, t##_end; \ - cudaEventCreate(&t##_start); \ - cudaEventCreate(&t##_end); - -// CHECK: hipEventRecord(t##_start); \ -// CHECK: hipEventSynchronize(t##_start); -#define TIMER_START(t) \ - cudaEventRecord(t##_start); \ - cudaEventSynchronize(t##_start); \ - -// CHECK: hipEventRecord(t##_start); \ -// CHECK: hipEventSynchronize(t##_start); \ -// CHECK: hipEventRecord(t##_end); \ -// CHECK: hipEventSynchronize(t##_end); \ -// CHECK: hipEventElapsedTime(&t, t##_start, t##_end); -#define TIMER_END(t) \ - cudaEventRecord(t##_start); \ - cudaEventSynchronize(t##_start); \ - cudaEventRecord(t##_end); \ - cudaEventSynchronize(t##_end); \ - cudaEventElapsedTime(&t, t##_start, t##_end); - - -inline void cErrorCheck(const char *file, int line) { -// CHECK: hipDeviceSynchronize(); -// CHECK: hipError_t err = hipGetLastError(); -// CHECK: if (err != hipSuccess) { -// CHECK: printf("Error: %s\n", hipGetErrorString(err)); - cudaThreadSynchronize(); - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf("Error: %s\n", cudaGetErrorString(err)); - printf(" @ %s: %d\n", file, line); - exit(-1); - } -} - -inline dim3 K_GRID(int n, int threads = K_THREADS) { - int blocks = (int)ceilf(sqrtf((float)n/threads)); - dim3 grid(blocks, blocks); - return grid; -} - -typedef struct data { - int n; - float4 *r, *v, *f; -} data; - -data cpu, gpu; - -#define N 20 - -__global__ void repulsion(data gpu); -__global__ void integration(data gpu); - - -int main() { - printf("Cuda Test 1\n"); - - int count = 0; - // CHECK: hipGetDeviceCount(&count); - cudaGetDeviceCount(&count); - printf(" %d CUDA devices found\n", count); - if(!count) { - ::exit(EXIT_FAILURE); - } - // CHECK: hipFree(0); - cudaFree(0); - - cpu.n = N; - - cpu.r = (float4*)malloc(N * sizeof(float4)); - cpu.v = (float4*)malloc(N * sizeof(float4)); - cpu.f = (float4*)malloc(N * sizeof(float4)); - - for(int i = 0; i < N; ++i) { - cpu.v[i] = make_float4(0,0,0,0); - cpu.r[i] = make_float4(RND(), RND(), RND(), 0); - cpu.f[i] = make_float4(0,0.01,0,0); - } - - gpu = cpu; - // CHECK: hipMalloc(&gpu.r, N * sizeof(float4)); - // CHECK: hipMalloc(&gpu.v, N * sizeof(float4)); - // CHECK: hipMalloc(&gpu.f, N * sizeof(float4)); - cudaMalloc(&gpu.r, N * sizeof(float4)); - cudaMalloc(&gpu.v, N * sizeof(float4)); - cudaMalloc(&gpu.f, N * sizeof(float4)); - // CHECK: hipMemcpy(gpu.r, cpu.r, cpu.n * sizeof(float4), hipMemcpyHostToDevice); - // CHECK: hipMemcpy(gpu.v, cpu.v, cpu.n * sizeof(float4), hipMemcpyHostToDevice); - // CHECK: hipMemcpy(gpu.f, cpu.f, cpu.n * sizeof(float4), hipMemcpyHostToDevice); - cudaMemcpy(gpu.r, cpu.r, cpu.n * sizeof(float4), cudaMemcpyHostToDevice); - cudaMemcpy(gpu.v, cpu.v, cpu.n * sizeof(float4), cudaMemcpyHostToDevice); - cudaMemcpy(gpu.f, cpu.f, cpu.n * sizeof(float4), cudaMemcpyHostToDevice); - - ERRORCHECK(); - float rep; - TIMER_CREATE(rep); - TIMER_START(rep); - // CHECK: hipLaunchKernelGGL(integration, dim3(K_GRID(cpu.n)), dim3(K_THREADS), 0, 0, gpu); - integration <<< K_GRID(cpu.n), K_THREADS >>>(gpu); - - TIMER_END(rep); - printf("Took: %f ms\n", rep); - ERRORCHECK(); - // CHECK: hipMemcpy(cpu.r, gpu.r, cpu.n * sizeof(float4), hipMemcpyDeviceToHost); - // CHECK: hipMemcpy(cpu.v, gpu.v, cpu.n * sizeof(float4), hipMemcpyDeviceToHost); - // CHECK: hipMemcpy(cpu.f, gpu.f, cpu.n * sizeof(float4), hipMemcpyDeviceToHost); - cudaMemcpy(cpu.r, gpu.r, cpu.n * sizeof(float4), cudaMemcpyDeviceToHost); - cudaMemcpy(cpu.v, gpu.v, cpu.n * sizeof(float4), cudaMemcpyDeviceToHost); - cudaMemcpy(cpu.f, gpu.f, cpu.n * sizeof(float4), cudaMemcpyDeviceToHost); - // CHECK: hipHostFree(cpu.r); - // CHECK: hipHostFree(cpu.v); - // CHECK: hipHostFree(cpu.f); - cudaFreeHost(cpu.r); - cudaFreeHost(cpu.v); - cudaFreeHost(cpu.f); - // CHECK: hipFree(gpu.r); - // CHECK: hipFree(gpu.v); - // CHECK: hipFree(gpu.f); - cudaFree(gpu.r); - cudaFree(gpu.v); - cudaFree(gpu.f); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - printf("Results: \n"); - for(int i = 0; i < N; ++i) { - printf("%f, %f, %f \n", cpu.r[i].x, cpu.r[i].y, cpu.r[i].z); - } - - printf("Ready...\n"); - return 0; -} - -__global__ void repulsion(data gpu) { - int idx = K_INDEX(); - if(idx < N) { - gpu.r[idx].x = 1; - gpu.r[idx].y = 1; - gpu.r[idx].z = 1; - } -} - -#define MULT4(v, s) v.x *= s; v.y *= s; v.z *= s; v.w *= s; -#define ADD4(v1, v2) v1.x += v2.x; v1.y += v2.y; v1.z += v2.z; v1.w += v2.w; - -__global__ void integration(data gpu) { - int i = K_INDEX(); - if(i < N) { - MULT4(gpu.f[i], 0.01); - MULT4(gpu.v[i], 0.01); - ADD4(gpu.v[i], gpu.f[i]); - ADD4(gpu.r[i], gpu.v[i]); - gpu.f[i] = make_float4(0,0,0,0); - } -} diff --git a/tests/hipify-clang/unit_tests/samples/square.cu b/tests/hipify-clang/unit_tests/samples/square.cu deleted file mode 100644 index b415c15495..0000000000 --- a/tests/hipify-clang/unit_tests/samples/square.cu +++ /dev/null @@ -1,112 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include -#include - -#define CHECK(cmd) \ -{\ - cudaError_t error = cmd;\ - if (error != cudaSuccess) { \ - fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \ - exit(EXIT_FAILURE);\ - }\ -} - - -/* - * Square each element in the array A and write to array C. - */ -template -__global__ void -vector_square(T *C_d, const T *A_d, size_t N) -{ - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i=offset; i>> (C_d, A_d, N); - - printf ("info: copy Device2Host\n"); - // CHECK: CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - CHECK ( cudaMemcpy(C_h, C_d, Nbytes, cudaMemcpyDeviceToHost)); - - printf ("info: check result\n"); - for (size_t i=0; i -// CHECK: #include -#include - -__global__ void staticReverse(int *d, int n) -{ - // CHECK-NOT: HIP_DYNAMIC_SHARED(int, s); - // CHECK: __shared__ int s[64]; - __shared__ int s[64]; - int t = threadIdx.x; - int tr = n-t-1; - s[t] = d[t]; - // Will not conttinue until all threads completed. - __syncthreads(); - d[t] = s[tr]; -} - -int main(void) -{ - const int n = 64; - int a[n], r[n], d[n]; - - for (int i = 0; i < n; i++) { - a[i] = i; - r[i] = n-i-1; - d[i] = 0; - } - - int *d_d; - // CHECK: hipMalloc(&d_d, n * sizeof(int)); - cudaMalloc(&d_d, n * sizeof(int)); - // run version with static shared memory - // CHECK: hipMemcpy(d_d, a, n*sizeof(int), hipMemcpyHostToDevice); - cudaMemcpy(d_d, a, n*sizeof(int), cudaMemcpyHostToDevice); - // CHECK: hipLaunchKernelGGL(staticReverse, dim3(1), dim3(n), 0, 0, d_d, n); - staticReverse<<<1,n>>>(d_d, n); - // CHECK: hipMemcpy(d, d_d, n*sizeof(int), hipMemcpyDeviceToHost); - cudaMemcpy(d, d_d, n*sizeof(int), cudaMemcpyDeviceToHost); - for (int i = 0; i < n; i++) - if (d[i] != r[i]) printf("Error: d[%d]!=r[%d] (%d, %d)n", i, i, d[i], r[i]); -} diff --git a/tests/hipify-clang/unit_tests/samples/vec_add.cu b/tests/hipify-clang/unit_tests/samples/vec_add.cu deleted file mode 100644 index a6d8950e0b..0000000000 --- a/tests/hipify-clang/unit_tests/samples/vec_add.cu +++ /dev/null @@ -1,89 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Kernel definition -__global__ void vecAdd(float* A, float* B, float* C) -{ - int i = threadIdx.x; - A[i] = 0; - B[i] = i; - C[i] = A[i] + B[i]; -} -// CHECK: #include -#include -#define SIZE 10 -#define KERNELINVOKES 5000000 -int vecadd(int gpudevice, int rank) -{ - int devcheck(int, int); - devcheck(gpudevice, rank); - float A[SIZE], B[SIZE], C[SIZE]; - // Kernel invocation - float *devPtrA; - float *devPtrB; - float *devPtrC; - int memsize = SIZE * sizeof(float); - // CHECK: hipMalloc((void**)&devPtrA, memsize); - // CHECK: hipMalloc((void**)&devPtrB, memsize); - // CHECK: hipMalloc((void**)&devPtrC, memsize); - cudaMalloc((void**)&devPtrA, memsize); - cudaMalloc((void**)&devPtrB, memsize); - cudaMalloc((void**)&devPtrC, memsize); - // CHECK: hipMemcpy(devPtrA, A, memsize, hipMemcpyHostToDevice); - // CHECK: hipMemcpy(devPtrB, B, memsize, hipMemcpyHostToDevice); - cudaMemcpy(devPtrA, A, memsize, cudaMemcpyHostToDevice); - cudaMemcpy(devPtrB, B, memsize, cudaMemcpyHostToDevice); - for (int i = 0; i>>(devPtrA, devPtrB, devPtrC); - } - // CHECK: hipMemcpy(C, devPtrC, memsize, hipMemcpyDeviceToHost); - cudaMemcpy(C, devPtrC, memsize, cudaMemcpyDeviceToHost); - // calculate only up to gpudevice to show the unique output - // of each rank's kernel launch - for (int i = 0; i= device_count) - { - printf("gpudevice >= device_count ... exiting\n"); - exit(1); - } - // CHECK: hipError_t cudareturn; - // CHECK: hipDeviceProp_t deviceProp; - // CHECK: hipGetDeviceProperties(&deviceProp, gpudevice); - cudaError_t cudareturn; - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, gpudevice); - if (deviceProp.warpSize <= 1) - { - printf("rank %d: warning, CUDA Device Emulation (CPU) detected, exiting\n", rank); - exit(1); - } - // CHECK: cudareturn = hipSetDevice(gpudevice); - cudareturn = cudaSetDevice(gpudevice); - // CHECK: if (cudareturn == hipErrorInvalidDevice) - if (cudareturn == cudaErrorInvalidDevice) - { - // CHECK: perror("hipSetDevice returned hipErrorInvalidDevice"); - perror("cudaSetDevice returned cudaErrorInvalidDevice"); - } - else - { - // CHECK: hipGetDevice(&device); - cudaGetDevice(&device); - printf("rank %d: cudaGetDevice()=%d\n", rank, device); - } -} From a48b312aa940f891c0f39e71d490313796d7fedb Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Tue, 7 Apr 2020 12:49:26 -0400 Subject: [PATCH 071/132] [hip] Fix volatile-qualified member function declartion. - It should be a volatile-qualified member function instead of returning volatile type. Change-Id: Id7aaa1953d56151b59e469ef22b9f4280f63bebb --- include/hip/hcc_detail/hip_fp16.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h index 52abc1a004..3fa18dae2e 100644 --- a/include/hip/hcc_detail/hip_fp16.h +++ b/include/hip/hcc_detail/hip_fp16.h @@ -229,7 +229,7 @@ THE SOFTWARE. __host__ __device__ operator __half_raw() const { return __half_raw{data}; } __host__ __device__ - operator volatile __half_raw() const volatile + operator __half_raw() const volatile { return __half_raw{data}; } From 1d464f391ea53f07087f1f62c1e53bfc0ba32350 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Thu, 9 Apr 2020 10:20:11 +0530 Subject: [PATCH 072/132] printf test: loop with divergent exit condition Change-Id: I1071e4a240a280332bde669701c72228b9dea2df --- tests/src/printf/hipPrintfBasic.cpp | 37 +++++++++++++++++++++++++++++ tests/src/test_common.h | 4 ++++ 2 files changed, 41 insertions(+) diff --git a/tests/src/printf/hipPrintfBasic.cpp b/tests/src/printf/hipPrintfBasic.cpp index e51373c251..7d0c57cdc3 100644 --- a/tests/src/printf/hipPrintfBasic.cpp +++ b/tests/src/printf/hipPrintfBasic.cpp @@ -219,6 +219,42 @@ static void test_series(int *retval, uint num_blocks, uint threads_per_block) { HIPASSERT(linecount[msg_short] == num_threads); } +__global__ void kernel_divergent_loop() { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int result = 0; + + for (int i = 0; i <= tid; ++i) { + printf("%d\n", i); + } +} + +static void test_divergent_loop(uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + + hipLaunchKernelGGL(kernel_divergent_loop, dim3(num_blocks), dim3(threads_per_block), + 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + std::map count; + while (true) { + int i; + CapturedData >> i; + if (CapturedData.fail()) + break; + count[i]++; + } + + HIPASSERT(count.size() == num_threads); + for (int i = 0; i != num_threads; ++i) { + HIPASSERT(count[i] == num_threads - i); + } +} + int main() { uint num_blocks = 1; uint threads_per_block = 64; @@ -233,6 +269,7 @@ int main() { test_divergent0(retval, num_blocks, threads_per_block); test_divergent1(retval, num_blocks, threads_per_block); test_series(retval, num_blocks, threads_per_block); + test_divergent_loop(num_blocks, threads_per_block); passed(); } diff --git a/tests/src/test_common.h b/tests/src/test_common.h index 7d8c39e74c..ce65dfaf33 100644 --- a/tests/src/test_common.h +++ b/tests/src/test_common.h @@ -55,11 +55,15 @@ THE SOFTWARE. printf("%sPASSED!%s\n", KGRN, KNRM); \ exit(0); +// The real "assert" would have written to stderr. But it is +// sufficient to just fflush here without getting pedantic. This also +// ensures that we don't lose any earlier writes to stdout. #define failed(...) \ printf("%serror: ", KRED); \ printf(__VA_ARGS__); \ printf("\n"); \ printf("error: TEST FAILED\n%s", KNRM); \ + fflush(nullptr); \ abort(); #define warn(...) \ From 2af31479e2a758aa17638606019898946902f672 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 7 Apr 2020 06:57:42 -0400 Subject: [PATCH 073/132] Merge branch 'amd-master' into amd-master-next Change-Id: I3094c15008093f2072bcd38aca4ea90aeae2d97b --- CMakeLists.txt | 26 +- README.md | 3 +- bin/hipcc | 58 ++- bin/hipify-perl | 28 +- cmake/FindHIP.cmake | 1 - .../markdown/CUSPARSE_API_supported_by_HIP.md | 48 +- docs/markdown/hip_debugging.md | 14 +- hip_prof_gen.py | 47 +- hipify-clang/README.md | 243 ++++----- .../src/CUDA2HIP_Driver_API_functions.cpp | 4 +- .../src/CUDA2HIP_SPARSE_API_functions.cpp | 38 +- .../src/CUDA2HIP_SPARSE_API_types.cpp | 10 +- include/hip/hcc_detail/device_functions.h | 8 +- include/hip/hcc_detail/device_library_decls.h | 3 +- .../hip/hcc_detail/functional_grid_launch.hpp | 31 +- include/hip/hcc_detail/hip_atomic.h | 8 + include/hip/hcc_detail/hip_fp16.h | 14 + include/hip/hcc_detail/hip_fp16_math_fwd.h | 2 + include/hip/hcc_detail/hip_runtime.h | 11 +- include/hip/hcc_detail/hip_runtime_api.h | 2 +- include/hip/hcc_detail/hip_vector_types.h | 97 +++- include/hip/hcc_detail/hiprtc.h | 4 + include/hip/hcc_detail/host_defines.h | 2 +- include/hip/hip_runtime_api.h | 1 - include/hip/nvcc_detail/hip_runtime_api.h | 43 +- lpl_ca/CMakeLists.txt | 2 +- lpl_ca/ca.hpp | 2 +- packaging/hip-base.txt | 7 +- packaging/hip-doc.txt | 3 +- packaging/hip-hcc.txt | 3 +- packaging/hip-nvcc.txt | 3 +- packaging/hip-samples.txt | 5 +- packaging/hip-targets.cmake | 2 - packaging/hip-vdi.txt | 12 +- samples/0_Intro/module_api/defaultDriver.cpp | 4 +- .../0_Intro/module_api/launchKernelHcc.cpp | 4 +- samples/0_Intro/module_api/runKernel.cpp | 4 +- .../0_Intro/module_api_global/runKernel.cpp | 4 +- samples/1_Utils/hipInfo/hipInfo.cpp | 3 +- src/code_object_bundle.cpp | 34 -- .../code_object_bundle.inl | 30 +- src/hip_clang.cpp | 4 +- src/hip_device.cpp | 12 + src/hip_hcc.cpp | 39 +- src/hip_hcc_internal.h | 14 - src/hip_memory.cpp | 101 ++-- src/hip_module.cpp | 475 ++++++++++++------ src/hip_stream.cpp | 38 +- src/hip_texture.cpp | 26 +- src/hiprtc.cpp | 2 +- src/program_state.inl | 9 +- .../libraries/cuSPARSE/cuSPARSE_12.cu | 3 +- .../Negative/memory/hipMemcpyFromSymbol.cpp | 46 ++ .../memory/hipMemcpyFromSymbolAsync.cpp | 49 ++ .../src/Negative/memory/hipMemcpyToSymbol.cpp | 46 ++ .../memory/hipMemcpyToSymbolAsync.cpp | 49 ++ tests/src/Negative/memory/hipMemory.cpp | 43 ++ .../stream/hipStreamCreateWithFlags.cpp | 40 ++ tests/src/deviceLib/hipTestHalf.cpp | 39 ++ tests/src/deviceLib/hip_floatnTM.cpp | 239 +++++++++ tests/src/hiprtc/hiprtcGetLoweredName.cpp | 2 +- tests/src/hiprtc/saxpy.cpp | 2 +- .../memory/hipMemcpyNegetiveTests.cpp | 53 ++ .../module/hipModuleLoadDataMultThreaded.cpp | 4 +- .../runtimeApi/stream/StreamAddCallback.cpp | 145 ++++++ .../stream/hipStreamAddCallbackCatch.cpp | 409 +++++++++++++++ tests/src/texture/hipTex1DFetchCheckModes.cpp | 122 +++++ 67 files changed, 2278 insertions(+), 601 deletions(-) delete mode 100644 src/code_object_bundle.cpp rename include/hip/hcc_detail/code_object_bundle.hpp => src/code_object_bundle.inl (86%) create mode 100644 tests/src/Negative/memory/hipMemcpyFromSymbol.cpp create mode 100644 tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp create mode 100644 tests/src/Negative/memory/hipMemcpyToSymbol.cpp create mode 100644 tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp create mode 100644 tests/src/Negative/memory/hipMemory.cpp create mode 100644 tests/src/Negative/stream/hipStreamCreateWithFlags.cpp create mode 100644 tests/src/deviceLib/hip_floatnTM.cpp create mode 100644 tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp create mode 100644 tests/src/runtimeApi/stream/StreamAddCallback.cpp create mode 100644 tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp create mode 100644 tests/src/texture/hipTex1DFetchCheckModes.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d1a2b133c5..c67ed29203 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,19 +207,6 @@ if (NOT CPACK_SET_DESTDIR) set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm/hip" CACHE PATH "Default installation path of hcc installer package") endif (NOT CPACK_SET_DESTDIR) -# Check if we need to enable ATP marker -if(NOT DEFINED COMPILE_HIP_ATP_MARKER) - if(NOT DEFINED ENV{COMPILE_HIP_ATP_MARKER}) - set(COMPILE_HIP_ATP_MARKER 0) - else() - set(COMPILE_HIP_ATP_MARKER $ENV{COMPILE_HIP_ATP_MARKER}) - message(WARNING "HIP Markers are deprecated, please use roctracer/rocTX marker APIs.") - endif() -else() - message(WARNING "HIP Markers are deprecated, please use roctracer/rocTX marker APIs.") -endif() -add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER) - ############################# # Profiling API support ############################# @@ -309,10 +296,6 @@ message(STATUS "\nHSA runtime in: " ${HSA_PATH}) if(HIP_PLATFORM STREQUAL "hcc") include_directories(${PROJECT_SOURCE_DIR}/include) set(HIP_HCC_BUILD_FLAGS) - if(COMPILE_HIP_ATP_MARKER) - include_directories(/opt/rocm/profiler/CXLActivityLogger/include) - set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -DCOMPILE_HIP_ATP_MARKER=1") - endif() # Add HIP_VERSION to CMAKE__FLAGS set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -DHIP_VERSION_MAJOR=${HIP_VERSION_MAJOR} -DHIP_VERSION_MINOR=${HIP_VERSION_MINOR} -DHIP_VERSION_PATCH=${HIP_VERSION_GITDATE}") @@ -328,7 +311,6 @@ if(HIP_PLATFORM STREQUAL "hcc") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_HCC_BUILD_FLAGS}") set(SOURCE_FILES_RUNTIME - src/code_object_bundle.cpp src/program_state.cpp src/hip_clang.cpp src/hip_hcc.cpp @@ -363,9 +345,6 @@ if(HIP_PLATFORM STREQUAL "hcc") set (CMAKE_BUILD_WITH_INSTALL_RPATH TRUE ) set (CMAKE_SKIP_BUILD_RPATH TRUE ) endif () - if(COMPILE_HIP_ATP_MARKER) - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L/opt/rocm/profiler/CXLActivityLogger/bin/x86_64 -lCXLActivityLogger") - endif() add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME}) add_library(hip_hcc_static STATIC ${SOURCE_FILES_RUNTIME}) @@ -376,7 +355,7 @@ if(HIP_PLATFORM STREQUAL "hcc") target_link_libraries(hip_hcc PRIVATE hc_am) target_link_libraries(hip_hcc_static PRIVATE hc_am) - add_library(hiprtc SHARED src/hiprtc.cpp src/code_object_bundle.cpp) + add_library(hiprtc SHARED src/hiprtc.cpp) target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY) set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" ) set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" ) @@ -387,6 +366,9 @@ if(HIP_PLATFORM STREQUAL "hcc") set_target_properties(hip_hcc PROPERTIES CXX_VISIBILITY_PRESET hidden) set_target_properties(hip_hcc PROPERTIES VISIBILITY_INLINES_HIDDEN 1) + set_target_properties(hiprtc PROPERTIES CXX_VISIBILITY_PRESET hidden) + set_target_properties(hiprtc PROPERTIES VISIBILITY_INLINES_HIDDEN 1) + if(HIP_PLATFORM STREQUAL "hcc") find_package(amd_comgr REQUIRED CONFIG diff --git a/README.md b/README.md index 2bffd12162..c2e2a7a456 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ ## What is this repository for? ### -HIP allows developers to convert CUDA code to portable C++. The same source code can be compiled to run on NVIDIA or AMD GPUs. +**HIP is a C++ Runtime API and Kernel Language that allows developers to create portable applications for AMD and NVIDIA GPUs from single source code.** + Key features include: * HIP is very thin and has little or no performance impact over coding directly in CUDA or hcc "HC" mode. diff --git a/bin/hipcc b/bin/hipcc index b3db312c78..5ed781bc60 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -134,6 +134,7 @@ if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) { $HIP_VDI_HOME = $HIP_PATH; # use HIP_PATH } $HIPCXXFLAGS .= "-D__HIP_VDI__"; + $HIPCFLAGS .= "-D__HIP_VDI__"; } if (defined $HIP_VDI_HOME) { @@ -207,7 +208,8 @@ if ($HIP_PLATFORM eq "clang") { } else { $HIPCXXFLAGS .= " -std=c++11"; } - $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH"; + $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; + $HIPCFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPLDFLAGS .= " -L$HIP_LIB_PATH"; if (not $isWindows) { $HIPLDFLAGS .= " -Wl,--rpath-link=$HIP_LIB_PATH"; @@ -222,8 +224,10 @@ if ($HIP_PLATFORM eq "clang") { $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; + $HIPCFLAGS .= " -isystem $HSA_PATH/include"; if (!($HIP_RUNTIME eq "HCC")) { $HIPCXXFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api"; + $HIPCFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api"; } } elsif ($HIP_PLATFORM eq "hcc") { @@ -282,8 +286,11 @@ if ($HIP_PLATFORM eq "clang") { } $HIPCXXFLAGS .= " -isystem $HIP_PATH/include/hip/hcc_detail/cuda"; + $HIPCFLAGS .= " -isystem $HIP_PATH/include/hip/hcc_detail/cuda"; $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; + $HIPCFLAGS .= " -isystem $HSA_PATH/include"; $HIPCXXFLAGS .= " -Wno-deprecated-register"; + $HIPCFLAGS .= " -Wno-deprecated-register"; $HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am "; # $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport "; @@ -321,6 +328,7 @@ if ($HIP_PLATFORM eq "clang") { $HIPCC="$CUDA_PATH/bin/nvcc"; $HIPCXXFLAGS .= " -Wno-deprecated-gpu-targets "; $HIPCXXFLAGS .= " -isystem $CUDA_PATH/include"; + $HIPCFLAGS .= " -isystem $CUDA_PATH/include"; $HIPLDFLAGS = " -Wno-deprecated-gpu-targets -lcuda -lcudart -L$CUDA_PATH/lib64"; } else { @@ -330,11 +338,14 @@ if ($HIP_PLATFORM eq "clang") { # Add paths to common HIP includes: $HIPCXXFLAGS .= " -isystem $HIP_INCLUDE_PATH" ; +$HIPCFLAGS .= " -isystem $HIP_INCLUDE_PATH" ; my $compileOnly = 0; my $needCXXFLAGS = 0; # need to add CXX flags to compile step +my $needCFLAGS = 0; # need to add C flags to compile step my $needLDFLAGS = 1; # need to add LDFLAGS to compile step. -my $hasC = 0; # options contain a c-style file (NVCC must force recognition as GPU file) +my $hasC = 0; # options contain a c-style file +my $hasCXX = 0; # options contain a cpp-style file (NVCC must force recognition as GPU file) my $hasCU = 0; # options contain a cu-style file (HCC must force recognition as GPU file) my $needHipHcc = ($HIP_PLATFORM eq 'hcc'); # set if we need to link hip_hcc.o from src tree. (some builds, ie cmake, provide their own) my $printHipVersion = 0; # print HIP version @@ -343,6 +354,7 @@ my $buildDeps = 0; my $linkType = 1; my $setLinkType = 0; my $coFormatv3 = 1; +my $funcSupp = 0; # enable function support my @options = (); my @inputs = (); @@ -400,7 +412,6 @@ foreach $arg (@ARGV) my $swallowArg = 0; if ($arg eq '-c' or $arg eq '--genco') { $compileOnly = 1; - $needCXXFLAGS = 1; $needLDFLAGS = 0; } @@ -463,6 +474,7 @@ foreach $arg (@ARGV) } if($trimarg eq '-use_fast_math') { $HIPCXXFLAGS .= " -DHIP_FAST_MATH "; + $HIPCFLAGS .= " -DHIP_FAST_MATH "; } if(($trimarg eq '-use-staticlib') and ($setLinkType eq 0)) { @@ -599,20 +611,30 @@ foreach $arg (@ARGV) #if $arg eq "--hipcc_profile") { # Example argument here, hipcc # #} + if ($arg eq "--hipcc-func-supp") { + $funcSupp = 1; + } elsif ($arg eq "--hipcc-no-func-supp") { + $funcSupp = 0; + } } else { push (@options, $arg); } #print "O: <$arg>\n"; } else { # input files and libraries - if (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.c$/) or ($arg =~ /\.cc$/) ) { + if ($arg =~ /\.c$/) { $hasC = 1; + $needCFLAGS = 1; + $toolArgs .= " -x c" + } + elsif (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.cc$/) ) { + $hasCXX = 1; $needCXXFLAGS = 1; - if ($HIP_PLATFORM eq 'clang') { + if ($HIP_PLATFORM eq 'clang' and not $arg =~ /\.c$/) { $toolArgs .= " -x hip" } } - if (($arg =~ /\.cu$/) or ($arg =~ /\.cuh$/) or ($arg =~ /\.hip$/)) { + elsif (($arg =~ /\.cu$/) or ($arg =~ /\.cuh$/) or ($arg =~ /\.hip$/)) { $hasCU = 1; $needCXXFLAGS = 1; if ($HIP_PLATFORM eq 'clang') { @@ -657,7 +679,7 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ my $archMacro = ' -D__HIP_ARCH_' . uc($val) . '__=1 '; # Add the arch option and macro to the compiler options. $GPU_ARCH_ARG = $GPU_ARCH_OPT . $val; - $HIPLDFLAGS .= $GPU_ARCH_ARG; + $HIPLDARCHFLAGS .= $GPU_ARCH_ARG; $HIPCXXFLAGS .= $archMacro; if ($HIP_PLATFORM eq 'clang') { $HIPCXXFLAGS .= $GPU_ARCH_ARG; @@ -685,7 +707,7 @@ if ($coFormatv3 and $HIP_PLATFORM eq 'hcc') { $HIPCXXFLAGS .= " -mcode-object-v3"; } -if ($hasC and $HIP_PLATFORM eq 'nvcc') { +if ($hasCXX and $HIP_PLATFORM eq 'nvcc') { $HIPCXXFLAGS .= " -x cu"; } if ($hasCU and $HIP_PLATFORM eq 'hcc') { @@ -694,6 +716,7 @@ if ($hasCU and $HIP_PLATFORM eq 'hcc') { if ($buildDeps and $HIP_PLATFORM eq 'nvcc') { $HIPCXXFLAGS .= " -M -D__CUDACC__"; + $HIPCFLAGS .= " -M -D__CUDACC__"; } if ($buildDeps and $HIP_PLATFORM eq 'clang') { @@ -701,10 +724,14 @@ if ($buildDeps and $HIP_PLATFORM eq 'clang') { } # Add --hip-link only if there are no source files. -if (!$needCXXFLAGS and $HIP_PLATFORM eq 'clang') { +if (!$needCXXFLAGS and !$needCFLAGS and $HIP_PLATFORM eq 'clang') { $HIPLDFLAGS .= " --hip-link"; } +if (!$needCFLAGS and $HIP_PLATFORM eq 'clang') { + $HIPLDFLAGS .= $HIPLDARCHFLAGS; +} + if ($setStdLib eq 0 and $HIP_PLATFORM eq 'hcc') { $HIPCXXFLAGS .= $HCC_WA_FLAGS; @@ -727,11 +754,12 @@ if ($HIP_PLATFORM eq "clang") { # Set default optimization level to -O3 for hip-clang. if ($optArg eq "") { $HIPCXXFLAGS .= " -O3"; + $HIPCFLAGS .= " -O3"; $HIPLDFLAGS .= " -O3"; } # Do not pass -mllvm on Windows since there is a clang bug causing duplicate -mllvm options in clang -cc1 on Windows. # ToDo : remove restriction for Windows after clang bug is fixed. - if ($optArg ne "-O0" and not $isWindows) { + if (!$funcSupp and $optArg ne "-O0" and not $isWindows) { $HIPCXXFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; if ($needLDFLAGS and not $needCXXFLAGS) { $HIPLDFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; @@ -747,18 +775,22 @@ if ($HIP_PLATFORM eq "clang") { if ($HIPCC_COMPILE_FLAGS_APPEND) { $HIPCXXFLAGS .= " $HIPCC_COMPILE_FLAGS_APPEND"; + $HIPCFLAGS .= " $HIPCC_COMPILE_FLAGS_APPEND"; } if ($HIPCC_LINK_FLAGS_APPEND) { $HIPLDFLAGS .= " $HIPCC_LINK_FLAGS_APPEND"; } my $CMD="$HIPCC"; -if ($needCXXFLAGS) { - $CMD .= " $HIPCXXFLAGS"; -} if ($needLDFLAGS and not $compileOnly) { $CMD .= " $HIPLDFLAGS"; } +if ($needCFLAGS) { + $CMD .= " $HIPCFLAGS"; +} +if ($needCXXFLAGS) { + $CMD .= " $HIPCXXFLAGS"; +} $CMD .= " $toolArgs"; if ($verbose & 0x1) { diff --git a/bin/hipify-perl b/bin/hipify-perl index 62cd8436f8..4783214836 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -341,8 +341,8 @@ sub simpleSubstitutions { $ft{'execution'} += s/\bcudaLaunchCooperativeKernelMultiDevice\b/hipLaunchCooperativeKernelMultiDevice/g; $ft{'execution'} += s/\bcudaLaunchKernel\b/hipLaunchKernel/g; $ft{'execution'} += s/\bcudaSetupArgument\b/hipSetupArgument/g; - $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessor\b/hipOccupancyMaxActiveBlocksPerMultiprocessor/g; - $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags\b/hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags/g; + $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessor\b/hipDrvOccupancyMaxActiveBlocksPerMultiprocessor/g; + $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags\b/hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags/g; $ft{'occupancy'} += s/\bcuOccupancyMaxPotentialBlockSize\b/hipOccupancyMaxPotentialBlockSize/g; $ft{'occupancy'} += s/\bcudaOccupancyMaxActiveBlocksPerMultiprocessor\b/hipOccupancyMaxActiveBlocksPerMultiprocessor/g; $ft{'occupancy'} += s/\bcudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags\b/hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags/g; @@ -754,6 +754,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseCcsrmm\b/hipsparseCcsrmm/g; $ft{'library'} += s/\bcusparseCcsrmm2\b/hipsparseCcsrmm2/g; $ft{'library'} += s/\bcusparseCcsrmv\b/hipsparseCcsrmv/g; + $ft{'library'} += s/\bcusparseCcsrsm2_analysis\b/hipsparseCcsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseCcsrsm2_bufferSizeExt\b/hipsparseCcsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseCcsrsm_solve\b/hipsparseCcsrsm_solve/g; $ft{'library'} += s/\bcusparseCcsrsv2_analysis\b/hipsparseCcsrsv2_analysis/g; $ft{'library'} += s/\bcusparseCcsrsv2_bufferSize\b/hipsparseCcsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseCcsrsv2_bufferSizeExt\b/hipsparseCcsrsv2_bufferSizeExt/g; @@ -763,9 +766,11 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseCgthr\b/hipsparseCgthr/g; $ft{'library'} += s/\bcusparseCgthrz\b/hipsparseCgthrz/g; $ft{'library'} += s/\bcusparseChybmv\b/hipsparseChybmv/g; + $ft{'library'} += s/\bcusparseCnnz\b/hipsparseCnnz/g; $ft{'library'} += s/\bcusparseCreate\b/hipsparseCreate/g; $ft{'library'} += s/\bcusparseCreateCsrgemm2Info\b/hipsparseCreateCsrgemm2Info/g; $ft{'library'} += s/\bcusparseCreateCsrilu02Info\b/hipsparseCreateCsrilu02Info/g; + $ft{'library'} += s/\bcusparseCreateCsrsm2Info\b/hipsparseCreateCsrsm2Info/g; $ft{'library'} += s/\bcusparseCreateCsrsv2Info\b/hipsparseCreateCsrsv2Info/g; $ft{'library'} += s/\bcusparseCreateHybMat\b/hipsparseCreateHybMat/g; $ft{'library'} += s/\bcusparseCreateIdentityPermutation\b/hipsparseCreateIdentityPermutation/g; @@ -784,6 +789,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseDcsrmm\b/hipsparseDcsrmm/g; $ft{'library'} += s/\bcusparseDcsrmm2\b/hipsparseDcsrmm2/g; $ft{'library'} += s/\bcusparseDcsrmv\b/hipsparseDcsrmv/g; + $ft{'library'} += s/\bcusparseDcsrsm2_analysis\b/hipsparseDcsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseDcsrsm2_bufferSizeExt\b/hipsparseDcsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseDcsrsm_solve\b/hipsparseDcsrsm_solve/g; $ft{'library'} += s/\bcusparseDcsrsv2_analysis\b/hipsparseDcsrsv2_analysis/g; $ft{'library'} += s/\bcusparseDcsrsv2_bufferSize\b/hipsparseDcsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseDcsrsv2_bufferSizeExt\b/hipsparseDcsrsv2_bufferSizeExt/g; @@ -792,12 +800,14 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseDestroy\b/hipsparseDestroy/g; $ft{'library'} += s/\bcusparseDestroyCsrgemm2Info\b/hipsparseDestroyCsrgemm2Info/g; $ft{'library'} += s/\bcusparseDestroyCsrilu02Info\b/hipsparseDestroyCsrilu02Info/g; + $ft{'library'} += s/\bcusparseDestroyCsrsm2Info\b/hipsparseDestroyCsrsm2Info/g; $ft{'library'} += s/\bcusparseDestroyCsrsv2Info\b/hipsparseDestroyCsrsv2Info/g; $ft{'library'} += s/\bcusparseDestroyHybMat\b/hipsparseDestroyHybMat/g; $ft{'library'} += s/\bcusparseDestroyMatDescr\b/hipsparseDestroyMatDescr/g; $ft{'library'} += s/\bcusparseDgthr\b/hipsparseDgthr/g; $ft{'library'} += s/\bcusparseDgthrz\b/hipsparseDgthrz/g; $ft{'library'} += s/\bcusparseDhybmv\b/hipsparseDhybmv/g; + $ft{'library'} += s/\bcusparseDnnz\b/hipsparseDnnz/g; $ft{'library'} += s/\bcusparseDroti\b/hipsparseDroti/g; $ft{'library'} += s/\bcusparseDsctr\b/hipsparseDsctr/g; $ft{'library'} += s/\bcusparseGetMatDiagType\b/hipsparseGetMatDiagType/g; @@ -820,6 +830,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseScsrmm\b/hipsparseScsrmm/g; $ft{'library'} += s/\bcusparseScsrmm2\b/hipsparseScsrmm2/g; $ft{'library'} += s/\bcusparseScsrmv\b/hipsparseScsrmv/g; + $ft{'library'} += s/\bcusparseScsrsm2_analysis\b/hipsparseScsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseScsrsm2_bufferSizeExt\b/hipsparseScsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseScsrsm_solve\b/hipsparseScsrsm_solve/g; $ft{'library'} += s/\bcusparseScsrsv2_analysis\b/hipsparseScsrsv2_analysis/g; $ft{'library'} += s/\bcusparseScsrsv2_bufferSize\b/hipsparseScsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseScsrsv2_bufferSizeExt\b/hipsparseScsrsv2_bufferSizeExt/g; @@ -834,6 +847,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseSgthr\b/hipsparseSgthr/g; $ft{'library'} += s/\bcusparseSgthrz\b/hipsparseSgthrz/g; $ft{'library'} += s/\bcusparseShybmv\b/hipsparseShybmv/g; + $ft{'library'} += s/\bcusparseSnnz\b/hipsparseSnnz/g; $ft{'library'} += s/\bcusparseSroti\b/hipsparseSroti/g; $ft{'library'} += s/\bcusparseSsctr\b/hipsparseSsctr/g; $ft{'library'} += s/\bcusparseXbsrilu02_zeroPivot\b/hipsparseXbsrilu02_zeroPivot/g; @@ -847,6 +861,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseXcsrgemm2Nnz\b/hipsparseXcsrgemm2Nnz/g; $ft{'library'} += s/\bcusparseXcsrgemmNnz\b/hipsparseXcsrgemmNnz/g; $ft{'library'} += s/\bcusparseXcsrilu02_zeroPivot\b/hipsparseXcsrilu02_zeroPivot/g; + $ft{'library'} += s/\bcusparseXcsrsm2_zeroPivot\b/hipsparseXcsrsm2_zeroPivot/g; $ft{'library'} += s/\bcusparseXcsrsort\b/hipsparseXcsrsort/g; $ft{'library'} += s/\bcusparseXcsrsort_bufferSizeExt\b/hipsparseXcsrsort_bufferSizeExt/g; $ft{'library'} += s/\bcusparseXcsrsv2_zeroPivot\b/hipsparseXcsrsv2_zeroPivot/g; @@ -863,6 +878,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseZcsrmm\b/hipsparseZcsrmm/g; $ft{'library'} += s/\bcusparseZcsrmm2\b/hipsparseZcsrmm2/g; $ft{'library'} += s/\bcusparseZcsrmv\b/hipsparseZcsrmv/g; + $ft{'library'} += s/\bcusparseZcsrsm2_analysis\b/hipsparseZcsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseZcsrsm2_bufferSizeExt\b/hipsparseZcsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseZcsrsm_solve\b/hipsparseZcsrsm_solve/g; $ft{'library'} += s/\bcusparseZcsrsv2_analysis\b/hipsparseZcsrsv2_analysis/g; $ft{'library'} += s/\bcusparseZcsrsv2_bufferSize\b/hipsparseZcsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseZcsrsv2_bufferSizeExt\b/hipsparseZcsrsv2_bufferSizeExt/g; @@ -872,6 +890,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseZgthr\b/hipsparseZgthr/g; $ft{'library'} += s/\bcusparseZgthrz\b/hipsparseZgthrz/g; $ft{'library'} += s/\bcusparseZhybmv\b/hipsparseZhybmv/g; + $ft{'library'} += s/\bcusparseZnnz\b/hipsparseZnnz/g; $ft{'library'} += s/\bcusparseZsctr\b/hipsparseZsctr/g; $ft{'device_library'} += s/\bcurand\b/hiprand/g; $ft{'device_library'} += s/\bcurand_discrete\b/hiprand_discrete/g; @@ -997,6 +1016,8 @@ sub simpleSubstitutions { $ft{'type'} += s/\bcsrgemm2Info\b/csrgemm2Info/g; $ft{'type'} += s/\bcsrgemm2Info_t\b/csrgemm2Info_t/g; $ft{'type'} += s/\bcsrilu02Info_t\b/csrilu02Info_t/g; + $ft{'type'} += s/\bcsrsm2Info\b/csrsm2Info/g; + $ft{'type'} += s/\bcsrsm2Info_t\b/csrsm2Info_t/g; $ft{'type'} += s/\bcsrsv2Info_t\b/csrsv2Info_t/g; $ft{'type'} += s/\bcuComplex\b/hipComplex/g; $ft{'type'} += s/\bcuDoubleComplex\b/hipDoubleComplex/g; @@ -1130,6 +1151,7 @@ sub simpleSubstitutions { $ft{'type'} += s/\bcurandStatus_t\b/hiprandStatus_t/g; $ft{'type'} += s/\bcusparseAction_t\b/hipsparseAction_t/g; $ft{'type'} += s/\bcusparseDiagType_t\b/hipsparseDiagType_t/g; + $ft{'type'} += s/\bcusparseDirection_t\b/hipsparseDirection_t/g; $ft{'type'} += s/\bcusparseFillMode_t\b/hipsparseFillMode_t/g; $ft{'type'} += s/\bcusparseHandle_t\b/hipsparseHandle_t/g; $ft{'type'} += s/\bcusparseHybMat_t\b/hipsparseHybMat_t/g; @@ -1398,6 +1420,8 @@ sub simpleSubstitutions { $ft{'numeric_literal'} += s/\bCUSPARSE_ACTION_SYMBOLIC\b/HIPSPARSE_ACTION_SYMBOLIC/g; $ft{'numeric_literal'} += s/\bCUSPARSE_DIAG_TYPE_NON_UNIT\b/HIPSPARSE_DIAG_TYPE_NON_UNIT/g; $ft{'numeric_literal'} += s/\bCUSPARSE_DIAG_TYPE_UNIT\b/HIPSPARSE_DIAG_TYPE_UNIT/g; + $ft{'numeric_literal'} += s/\bCUSPARSE_DIRECTION_COLUMN\b/HIPSPARSE_DIRECTION_COLUMN/g; + $ft{'numeric_literal'} += s/\bCUSPARSE_DIRECTION_ROW\b/HIPSPARSE_DIRECTION_ROW/g; $ft{'numeric_literal'} += s/\bCUSPARSE_FILL_MODE_LOWER\b/HIPSPARSE_FILL_MODE_LOWER/g; $ft{'numeric_literal'} += s/\bCUSPARSE_FILL_MODE_UPPER\b/HIPSPARSE_FILL_MODE_UPPER/g; $ft{'numeric_literal'} += s/\bCUSPARSE_HYB_PARTITION_AUTO\b/HIPSPARSE_HYB_PARTITION_AUTO/g; diff --git a/cmake/FindHIP.cmake b/cmake/FindHIP.cmake index 7edf27f3c7..0819a0364c 100644 --- a/cmake/FindHIP.cmake +++ b/cmake/FindHIP.cmake @@ -75,7 +75,6 @@ if(UNIX AND NOT APPLE AND NOT CYGWIN) endif() # And push it back to the cache set(HIP_ROOT_DIR ${HIP_ROOT_DIR} CACHE PATH "HIP installed location" FORCE) - message("Found HIP at ${HIP_ROOT_DIR}") endif() # Find HIPCC executable diff --git a/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/docs/markdown/CUSPARSE_API_supported_by_HIP.md index fc7a8ee8cd..d23b06d307 100644 --- a/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -12,9 +12,9 @@ | enum |***`cusparseAction_t`*** | |***`hipsparseAction_t`*** | | 0 |*`CUSPARSE_ACTION_SYMBOLIC`* | |*`HIPSPARSE_ACTION_SYMBOLIC`* | | 1 |*`CUSPARSE_ACTION_NUMERIC`* | |*`HIPSPARSE_ACTION_NUMERIC`* | -| enum |***`cusparseDirection_t`*** | | | -| 0 |*`CUSPARSE_DIRECTION_ROW`* | | | -| 1 |*`CUSPARSE_DIRECTION_COLUMN`* | | | +| enum |***`cusparseDirection_t`*** | |***`hipsparseDirection_t`*** | +| 0 |*`CUSPARSE_DIRECTION_ROW`* | |*`HIPSPARSE_DIRECTION_ROW`* | +| 1 |*`CUSPARSE_DIRECTION_COLUMN`* | |*`HIPSPARSE_DIRECTION_COLUMN`* | | enum |***`cusparseHybPartition_t`*** | |***`hipsparseHybPartition_t`*** | | 0 |*`CUSPARSE_HYB_PARTITION_AUTO`* | |*`HIPSPARSE_HYB_PARTITION_AUTO`* | | 1 |*`CUSPARSE_HYB_PARTITION_USER`* | |*`HIPSPARSE_HYB_PARTITION_USER`* | @@ -69,8 +69,8 @@ | typedef |`cusparseSolveAnalysisInfo_t` | | | | struct |`csrsv2Info` | | | | typedef |`csrsv2Info_t` | |`csrsv2Info_t` | -| struct |`csrsm2Info` | 9.2 | | -| typedef |`csrsm2Info_t` | | | +| struct |`csrsm2Info` | 9.2 |`csrsm2Info` | +| typedef |`csrsm2Info_t` | |`csrsm2Info_t` | | struct |`bsrsv2Info` | | | | typedef |`bsrsv2Info_t` | | | | struct |`bsrsm2Info` | | | @@ -151,8 +151,8 @@ |`cusparseGetStream` |`hipsparseGetStream` | 8.0 | |`cusparseCreateCsrsv2Info` |`hipsparseCreateCsrsv2Info` | |`cusparseDestroyCsrsv2Info` |`hipsparseDestroyCsrsv2Info` | -|`cusparseCreateCsrsm2Info` | | 9.2 | -|`cusparseDestroyCsrsm2Info` | | 9.2 | +|`cusparseCreateCsrsm2Info` |`hipsparseCreateCsrsm2Info` | 9.2 | +|`cusparseDestroyCsrsm2Info` |`hipsparseDestroyCsrsm2Info` | 9.2 | |`cusparseCreateCsric02Info` | | |`cusparseDestroyCsric02Info` | | |`cusparseCreateCsrilu02Info` |`hipsparseCreateCsrilu02Info` | @@ -306,19 +306,19 @@ |`cusparseDcsrsm_solve` | | |`cusparseCcsrsm_solve` | | |`cusparseZcsrsm_solve` | | -|`cusparseScsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseDcsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseCcsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseZcsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseScsrsm2_analysis` | | 9.2 | -|`cusparseDcsrsm2_analysis` | | 9.2 | -|`cusparseCcsrsm2_analysis` | | 9.2 | -|`cusparseZcsrsm2_analysis` | | 9.2 | -|`cusparseScsrsm2_solve` | | 9.2 | -|`cusparseDcsrsm2_solve` | | 9.2 | -|`cusparseCcsrsm2_solve` | | 9.2 | -|`cusparseZcsrsm2_solve` | | 9.2 | -|`cusparseXcsrsm2_zeroPivot` | | 9.2 | +|`cusparseScsrsm2_bufferSizeExt` |`hipsparseScsrsm2_bufferSizeExt` | 9.2 | +|`cusparseDcsrsm2_bufferSizeExt` |`hipsparseDcsrsm2_bufferSizeExt` | 9.2 | +|`cusparseCcsrsm2_bufferSizeExt` |`hipsparseCcsrsm2_bufferSizeExt` | 9.2 | +|`cusparseZcsrsm2_bufferSizeExt` |`hipsparseZcsrsm2_bufferSizeExt` | 9.2 | +|`cusparseScsrsm2_analysis` |`hipsparseScsrsm2_analysis` | 9.2 | +|`cusparseDcsrsm2_analysis` |`hipsparseDcsrsm2_analysis` | 9.2 | +|`cusparseCcsrsm2_analysis` |`hipsparseCcsrsm2_analysis` | 9.2 | +|`cusparseZcsrsm2_analysis` |`hipsparseZcsrsm2_analysis` | 9.2 | +|`cusparseScsrsm2_solve` |`hipsparseScsrsm2_solve` | 9.2 | +|`cusparseDcsrsm2_solve` |`hipsparseDcsrsm2_solve` | 9.2 | +|`cusparseCcsrsm2_solve` |`hipsparseCcsrsm2_solve` | 9.2 | +|`cusparseZcsrsm2_solve` |`hipsparseZcsrsm2_solve` | 9.2 | +|`cusparseXcsrsm2_zeroPivot` |`hipsparseXcsrsm2_zeroPivot` | 9.2 | |`cusparseSbsrmm` | | |`cusparseDbsrmm` | | |`cusparseCbsrmm` | | @@ -662,10 +662,10 @@ |`cusparseDhyb2dense` | | |`cusparseChyb2dense` | | |`cusparseZhyb2dense` | | -|`cusparseSnnz` | | -|`cusparseDnnz` | | -|`cusparseCnnz` | | -|`cusparseZnnz` | | +|`cusparseSnnz` |`cusparseSnnz` | +|`cusparseDnnz` |`cusparseDnnz` | +|`cusparseCnnz` |`cusparseCnnz` | +|`cusparseZnnz` |`cusparseZnnz` | |`cusparseCreateIdentityPermutation` |`hipsparseCreateIdentityPermutation` | |`cusparseXcoosort_bufferSizeExt` |`hipsparseXcoosort_bufferSizeExt` | |`cusparseXcoosortByRow` |`hipsparseXcoosortByRow` | diff --git a/docs/markdown/hip_debugging.md b/docs/markdown/hip_debugging.md index bf877d894e..fde17d410e 100644 --- a/docs/markdown/hip_debugging.md +++ b/docs/markdown/hip_debugging.md @@ -1,13 +1,13 @@ Table of Contents ================= - * [Profiling HIP Code](#profiling-hip-code" aria-hidden="true">\n'); + f.write('#include \n'); + f.write('// HIP API string method, method name and parameters\n') + f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') + f.write(' std::ostringstream oss;\n') + f.write(' switch (id) {\n') + for name, args in api_map.items(): + f.write(' case HIP_API_ID_' + name + ':\n') + f.write(' oss << "' + name + '("') + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_name = arg_tuple[1] + if ind != 0: f.write(' << ","') + f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) + f.write('\n << ")";\n') + f.write(' break;\n') + f.write(' default: oss << "unknown";\n') + f.write(' };\n') + f.write(' return strdup(oss.str().c_str());\n') + f.write('};\n') + f.write('#endif // ENABLE_HIP_API_STRING\n') f.write('#endif // _HIP_PROF_STR_H\n'); diff --git a/hipify-clang/README.md b/hipify-clang/README.md index 88d7a72ccd..8cdeceace0 100644 --- a/hipify-clang/README.md +++ b/hipify-clang/README.md @@ -42,10 +42,9 @@ After applying all the matchers, the output HIP source is produced. `hipify-clang` requires: -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [3.8.0](http://releases.llvm.org/download.html#3.8.0); the latest stable and recommended release: [**9.0.1**](http://releases.llvm.org/download.html#9.0.1), the latest release candidate: [10.0.0-rc3](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc3). +1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [3.8.0](http://releases.llvm.org/download.html#3.8.0); the latest stable and recommended release: [**10.0.0**](http://releases.llvm.org/download.html#10.0.0). -2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [7.0](https://developer.nvidia.com/cuda-toolkit-70), the latest supported version is [**10.1 Update 2**](https://developer.nvidia.com/cuda-10.1-download-archive-base). -To use the latest CUDA version [10.2](https://developer.nvidia.com/cuda-downloads) please use the latest `LLVM` release candidate: [10.0.0-rc3](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc3). +2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [7.0](https://developer.nvidia.com/cuda-toolkit-70), the latest supported version is [**10.2**](https://developer.nvidia.com/cuda-downloads). | **LLVM release version** | **CUDA latest supported version** | **Windows** | **Linux** | |:----------------------------------------------------------:|:------------------------------------------------------------------------:|:-----------:|:---------:| @@ -66,15 +65,15 @@ To use the latest CUDA version [10.2](https://developer.nvidia.com/cuda-download | [8.0.0](http://releases.llvm.org/download.html#8.0.0) | [10.0](https://developer.nvidia.com/cuda-10.0-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_8.0.0_bug_38811.zip)*
| + | | [8.0.1](http://releases.llvm.org/download.html#8.0.1) | [10.0](https://developer.nvidia.com/cuda-10.0-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_8.0.1_bug_38811.zip)*
| + | | [9.0.0](http://releases.llvm.org/download.html#9.0.0) | [10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) | + | + | -| [**9.0.1**](http://releases.llvm.org/download.html#9.0.1) | [**10.1**](https://developer.nvidia.com/cuda-10.1-download-archive-base) | +
**LATEST STABLE RELEASE** | +
**LATEST STABLE RELEASE** | -| [10.0.0-rc3](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc3) | [10.2](https://developer.nvidia.com/cuda-downloads) | + | + | +| [9.0.1](http://releases.llvm.org/download.html#9.0.1) | [10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) | + | + | +| [**10.0.0**](http://releases.llvm.org/download.html#10.0.0)| [**10.2**](https://developer.nvidia.com/cuda-downloads) | +
**LATEST STABLE RELEASE** | +
**LATEST STABLE RELEASE** | `*` Download the patch and unpack it into your `LLVM` distributive directory; a few header files will be overwritten; rebuilding of `LLVM` is not needed. In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=f:\LLVM\9.0.1\dist` +[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=d:\LLVM\10.0.0\dist` ### hipify-clang: usage @@ -83,14 +82,14 @@ To process a file, `hipify-clang` needs access to the same headers that would be For example: ```shell -./hipify-clang square.cu --cuda-path=/usr/local/cuda-10.1 -I /usr/local/cuda-10.1/samples/common/inc +./hipify-clang square.cu --cuda-path=/usr/local/cuda-10.2 -I /usr/local/cuda-10.2/samples/common/inc ``` `hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you were compiling the input file. For example: ```bash -./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-10.1 -- -std=c++17 +./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-10.2 -- -std=c++17 ``` The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful. @@ -158,7 +157,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build pro **LLVM 10.0.0 or newer:** -1. download [`LLVM project`](https://github.com/llvm/llvm-project/archive/llvmorg-10.0.0-rc3.tar.gz) sources; +1. download [`LLVM project`](https://github.com/llvm/llvm-project/releases/download/llvmorg-10.0.0/llvm-project-10.0.0.tar.xz) sources; 2. build [`LLVM project`](http://llvm.org/docs/CMake.html): **Linux**: @@ -193,19 +192,19 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build pro * Having multiple CUDA installations to choose a particular version the `DCUDA_TOOLKIT_ROOT_DIR` option should be specified: - - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.1` + - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.2` - - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1"` + - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2"` - `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.1"` + `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.2"` 4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed. * Path to cuDNN should be specified by the `CUDA_DNN_ROOT_DIR` option: - - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.1-v7.6.5.32` + - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.2-v7.6.5.32` - - ***Windows***: `-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.5.32` + - ***Windows***: `-DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-10.2-windows10-x64-v7.6.5.32` 5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed. @@ -213,7 +212,7 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build pro - ***Linux***: `-DCUDA_CUB_ROOT_DIR=/srv/git/CUB` - - ***Windows***: `-DCUDA_CUB_ROOT_DIR=f:/GIT/cub` + - ***Windows***: `-DCUDA_CUB_ROOT_DIR=d:/GIT/cub` 5. Ensure [`python`](https://www.python.org/downloads) of minimum required version 2.7 is installed. @@ -221,21 +220,21 @@ Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build pro * Install `lit` into `python`: - - ***Linux***: `python /srv/git/LLVM/9.0.1/llvm/utils/lit/setup.py install` + - ***Linux***: `python /srv/git/LLVM/10.0.0/llvm/utils/lit/setup.py install` - - ***Windows***: `python f:/LLVM/9.0.1/llvm/utils/lit/setup.py install` + - ***Windows***: `python d:/LLVM/10.0.0/llvm/utils/lit/setup.py install` * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/srv/git/LLVM/9.0.1/build/bin/llvm-lit` + - ***Linux***: `-DLLVM_EXTERNAL_LIT=/srv/git/LLVM/10.0.0/build/bin/llvm-lit` - - ***Windows***: `-DLLVM_EXTERNAL_LIT=f:/LLVM/9.0.1/build/Release/bin/llvm-lit.py` + - ***Windows***: `-DLLVM_EXTERNAL_LIT=d:/LLVM/10.0.0/build/Release/bin/llvm-lit.py` * `FileCheck`: - - ***Linux***: copy from `/srv/git/LLVM/9.0.1/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Linux***: copy from `/srv/git/LLVM/10.0.0/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - ***Windows***: copy from `f:/LLVM/9.0.1/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` + - ***Windows***: copy from `d:/LLVM/10.0.0/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option @@ -249,7 +248,7 @@ On Linux the following configurations are tested: Ubuntu 14: LLVM 5.0.0 - 6.0.1, CUDA 7.0 - 9.0, cudnn-5.0.5 - cudnn-7.6.5.32 -Ubuntu 16-18: LLVM 8.0.0 - 10.0.0-rc3, CUDA 8.0 - 10.2, cudnn-5.1.10 - cudnn-7.6.5.32 +Ubuntu 16-18: LLVM 8.0.0 - 10.0.0, CUDA 8.0 - 10.2, cudnn-5.1.10 - cudnn-7.6.5.32 Minimum build system requirements for the above configurations: @@ -262,11 +261,11 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/srv/git/LLVM/9.0.1/dist \ - -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.1 \ - -DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.1-v7.6.5.32 \ + -DCMAKE_PREFIX_PATH=/srv/git/LLVM/10.0.0/dist \ + -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.2 \ + -DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.2-v7.6.5.32 \ -DCUDA_CUB_ROOT_DIR=/srv/git/CUB \ - -DLLVM_EXTERNAL_LIT=/srv/git/LLVM/9.0.1/build/bin/llvm-lit \ + -DLLVM_EXTERNAL_LIT=/srv/git/LLVM/10.0.0/build/bin/llvm-lit \ .. ``` *A corresponding successful output:* @@ -285,14 +284,14 @@ cmake -- Detecting CXX compiler ABI info - done -- Detecting CXX compile features -- Detecting CXX compile features - done --- Found LLVM 9.0.1: --- - CMake module path: /srv/git/LLVM/9.0.1/dist/lib/cmake/llvm --- - Include path : /srv/git/LLVM/9.0.1/dist/include --- - Binary path : /srv/git/LLVM/9.0.1/dist/bin +-- Found LLVM 10.0.0: +-- - CMake module path: /srv/git/LLVM/10.0.0/dist/lib/cmake/llvm +-- - Include path : /srv/git/LLVM/10.0.0/dist/include +-- - Binary path : /srv/git/LLVM/10.0.0/dist/bin -- Linker detection: GNU ld -- Found PythonInterp: /usr/bin/python2.7 (found suitable version "2.7.12", minimum required is "2.7") -- Found lit: /usr/local/bin/lit --- Found FileCheck: /srv/git/LLVM/9.0.1/dist/bin/FileCheck +-- Found FileCheck: /srv/git/LLVM/10.0.0/dist/bin/FileCheck -- Looking for pthread.h -- Looking for pthread.h - found -- Looking for pthread_create @@ -302,7 +301,7 @@ cmake -- Looking for pthread_create in pthread -- Looking for pthread_create in pthread - found -- Found Threads: TRUE --- Found CUDA: /usr/local/cuda-10.1 (found version "10.1") +-- Found CUDA: /usr/local/cuda-10.2 (found version "10.2") -- Configuring done -- Generating done -- Build files have been written to: /srv/git/HIP/hipify-clang/build @@ -314,83 +313,85 @@ make test-hipify ```shell Running HIPify regression tests ======================================== -CUDA 10.1 - will be used for testing -LLVM 9.0.1 - will be used for testing +CUDA 10.2 - will be used for testing +LLVM 10.0.0 - will be used for testing x86_64 - Platform architecture Linux 5.2.0 - Platform OS 64 - hipify-clang binary bitness 64 - python 2.7.12 binary bitness ======================================== --- Testing: 67 tests, 12 threads -- -PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 67) -PASS: hipify :: unit_tests/device/math_functions.cu (2 of 67) -PASS: hipify :: unit_tests/device/atomics.cu (3 of 67) -PASS: hipify :: unit_tests/device/device_symbols.cu (4 of 67) -PASS: hipify :: unit_tests/headers/headers_test_01.cu (5 of 67) -PASS: hipify :: unit_tests/headers/headers_test_02.cu (6 of 67) -PASS: hipify :: unit_tests/headers/headers_test_03.cu (7 of 67) -PASS: hipify :: unit_tests/headers/headers_test_05.cu (8 of 67) -PASS: hipify :: unit_tests/headers/headers_test_04.cu (9 of 67) -PASS: hipify :: unit_tests/headers/headers_test_06.cu (10 of 67) -PASS: hipify :: unit_tests/headers/headers_test_07.cu (11 of 67) -PASS: hipify :: unit_tests/headers/headers_test_10.cu (12 of 67) -PASS: hipify :: unit_tests/headers/headers_test_11.cu (13 of 67) -PASS: hipify :: unit_tests/headers/headers_test_08.cu (14 of 67) -PASS: hipify :: unit_tests/kernel_launch/kernel_launch_01.cu (15 of 67) -PASS: hipify :: unit_tests/headers/headers_test_09.cu (16 of 67) -PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (17 of 67) -PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (18 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (19 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (20 of 67) -PASS: hipify :: unit_tests/libraries/CUB/cub_03.cu (21 of 67) -PASS: hipify :: unit_tests/libraries/CUB/cub_01.cu (22 of 67) -PASS: hipify :: unit_tests/libraries/CUB/cub_02.cu (23 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (24 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (25 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (26 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (27 of 67) -PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (28 of 67) -PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (29 of 67) -PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (30 of 67) -PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (31 of 67) -PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (32 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (33 of 67) -PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (34 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (35 of 67) -PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (36 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (37 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (38 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (39 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (40 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (41 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (42 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (43 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (44 of 67) -PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (45 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (46 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (47 of 67) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (48 of 67) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (49 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (50 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (51 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (52 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (53 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (54 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (55 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (56 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (57 of 67) -PASS: hipify :: unit_tests/samples/MallocManaged.cpp (58 of 67) -PASS: hipify :: unit_tests/samples/allocators.cu (59 of 67) -PASS: hipify :: unit_tests/samples/coalescing.cu (60 of 67) -PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (61 of 67) -PASS: hipify :: unit_tests/samples/axpy.cu (62 of 67) -PASS: hipify :: unit_tests/samples/intro.cu (63 of 67) -PASS: hipify :: unit_tests/samples/cudaRegister.cu (64 of 67) -PASS: hipify :: unit_tests/samples/square.cu (65 of 67) -PASS: hipify :: unit_tests/samples/static_shared_memory.cu (66 of 67) -PASS: hipify :: unit_tests/samples/vec_add.cu (67 of 67) -Testing Time: 3.07s - Expected Passes : 67 +-- Testing: 69 tests, 12 threads -- +PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 69) +PASS: hipify :: unit_tests/device/math_functions.cu (2 of 69) +PASS: hipify :: unit_tests/device/atomics.cu (3 of 69) +PASS: hipify :: unit_tests/headers/headers_test_01.cu (4 of 69) +PASS: hipify :: unit_tests/device/device_symbols.cu (5 of 69) +PASS: hipify :: unit_tests/headers/headers_test_02.cu (6 of 69) +PASS: hipify :: unit_tests/headers/headers_test_03.cu (7 of 69) +PASS: hipify :: unit_tests/headers/headers_test_05.cu (8 of 69) +PASS: hipify :: unit_tests/headers/headers_test_04.cu (9 of 69) +PASS: hipify :: unit_tests/headers/headers_test_07.cu (10 of 69) +PASS: hipify :: unit_tests/headers/headers_test_06.cu (11 of 69) +PASS: hipify :: unit_tests/headers/headers_test_11.cu (12 of 69) +PASS: hipify :: unit_tests/headers/headers_test_10.cu (13 of 69) +PASS: hipify :: unit_tests/headers/headers_test_08.cu (14 of 69) +PASS: hipify :: unit_tests/kernel_launch/kernel_launch_01.cu (15 of 69) +PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (16 of 69) +PASS: hipify :: unit_tests/headers/headers_test_09.cu (17 of 69) +PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (18 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (19 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (20 of 69) +PASS: hipify :: unit_tests/libraries/CUB/cub_03.cu (21 of 69) +PASS: hipify :: unit_tests/libraries/CUB/cub_01.cu (22 of 69) +PASS: hipify :: unit_tests/libraries/CUB/cub_02.cu (23 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (24 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (25 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (26 of 69) +PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (27 of 69) +PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (28 of 69) +PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (29 of 69) +PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (30 of 69) +PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (31 of 69) +PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (32 of 69) +PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (33 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (34 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (35 of 69) +PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (36 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (37 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (38 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (39 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (40 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (41 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (42 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (43 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (44 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (45 of 69) +PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (46 of 69) +PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (47 of 69) +PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (48 of 69) +PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (49 of 69) +PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu (50 of 69) +PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu (51 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (52 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (53 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (54 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (55 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (56 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (57 of 69) +PASS: hipify :: unit_tests/samples/MallocManaged.cpp (58 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (59 of 69) +PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (60 of 69) +PASS: hipify :: unit_tests/samples/allocators.cu (61 of 69) +PASS: hipify :: unit_tests/samples/coalescing.cu (62 of 69) +PASS: hipify :: unit_tests/samples/axpy.cu (63 of 69) +PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (64 of 69) +PASS: hipify :: unit_tests/samples/cudaRegister.cu (65 of 69) +PASS: hipify :: unit_tests/samples/intro.cu (66 of 69) +PASS: hipify :: unit_tests/samples/square.cu (67 of 69) +PASS: hipify :: unit_tests/samples/static_shared_memory.cu (68 of 69) +PASS: hipify :: unit_tests/samples/vec_add.cu (69 of 69) +Testing Time: 3.23s + Expected Passes : 69 [100%] Built target test-hipify ``` ### hipify-clang: Windows @@ -404,8 +405,8 @@ Testing Time: 3.07s | 7.0.0 - 7.1.0 | 9.2 | 7.6.5.32 | 2017.15.9.11 | 3.13.3 | 3.7.3 | | 8.0.0 - 8.0.1 | 10.0 | 7.6.5.32 | 2017.15.9.15 | 3.14.2 | 3.7.4 | | 9.0.0 - 9.0.1 | 10.1 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.4 | 3.8.0 | -| 10.0.0-rc1-rc3 | 10.2 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.4 | 3.8.1 | -| 11.0.0git | 10.2 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.5 | 3.8.2 | +| 10.0.0 | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.1 | 3.17.0 | 3.8.2 | +| 11.0.0git | 10.2 | 7.6.5.32 | 2017.15.9.21, 2019.16.5.1 | 3.17.0 | 3.8.2 | *Building with testing support on `Windows 10` by `Visual Studio 16 2019`:* @@ -416,28 +417,28 @@ cmake -DHIPIFY_CLANG_TESTS=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=f:/LLVM/9.0.1/dist \ - -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1" \ - -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.1" \ - -DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.5.32 \ - -DCUDA_CUB_ROOT_DIR=f:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=f:/LLVM/9.0.1/build/Release/bin/llvm-lit.py \ + -DCMAKE_PREFIX_PATH=d:/LLVM/10.0.0/dist \ + -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2" \ + -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.2" \ + -DCUDA_DNN_ROOT_DIR=d:/CUDNN/cudnn-10.2-windows10-x64-v7.6.5.32 \ + -DCUDA_CUB_ROOT_DIR=d:/GIT/cub \ + -DLLVM_EXTERNAL_LIT=d:/LLVM/10.0.0/build/Release/bin/llvm-lit.py \ -Thost=x64 .. ``` *A corresponding successful output:* ```shell --- Found LLVM 9.0.1: --- - CMake module path: F:/LLVM/9.0.1/dist/lib/cmake/llvm --- - Include path : F:/LLVM/9.0.1/dist/include --- - Binary path : F:/LLVM/9.0.1/dist/bin --- Found PythonInterp: C:/Program Files/Python38/python.exe (found suitable version "3.8.2", minimum required is "3.6") --- Found lit: C:/Program Files/Python38/Scripts/lit.exe --- Found FileCheck: F:/LLVM/9.0.1/dist/bin/FileCheck.exe --- Found CUDA: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1 (found version "10.1") +-- Found LLVM 10.0.0: +-- - CMake module path: d:/LLVM/10.0.0/dist/lib/cmake/llvm +-- - Include path : d:/LLVM/10.0.0/dist/include +-- - Binary path : d:/LLVM/10.0.0/dist/bin +-- Found PythonInterp: c:/Program Files/Python38/python.exe (found suitable version "3.8.2", minimum required is "3.6") +-- Found lit: c:/Program Files/Python38/Scripts/lit.exe +-- Found FileCheck: d:/LLVM/10.0.0/dist/bin/FileCheck.exe +-- Found CUDA: c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2 (found version "10.2") -- Configuring done -- Generating done --- Build files have been written to: f:/HIP/hipify-clang/build +-- Build files have been written to: d:/HIP/hipify-clang/build ``` Run `Visual Studio 16 2019`, open the generated `hipify-clang.sln`, build project `test-hipify`. diff --git a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp index ab07a10e93..7be0fd0f3d 100644 --- a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ b/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp @@ -545,9 +545,9 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ // 5.21. Occupancy // cudaOccupancyMaxActiveBlocksPerMultiprocessor - {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}}, + {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags - {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}}, + {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags","", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxPotentialBlockSize {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxPotentialBlockSizeWithFlags diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp index 1df1bb9cba..0f3997145e 100644 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -49,8 +49,8 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseGetStream", {"hipsparseGetStream", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseCreateCsrsv2Info", {"hipsparseCreateCsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseDestroyCsrsv2Info", {"hipsparseDestroyCsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateCsrsm2Info", {"hipsparseCreateCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyCsrsm2Info", {"hipsparseDestroyCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCreateCsrsm2Info", {"hipsparseCreateCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDestroyCsrsm2Info", {"hipsparseDestroyCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseCreateCsric02Info", {"hipsparseCreateCsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDestroyCsric02Info", {"hipsparseDestroyCsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseCreateCsrilu02Info", {"hipsparseCreateCsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE}}, @@ -218,27 +218,27 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseCcsrsm_analysis", {"hipsparseCcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZcsrsm_analysis", {"hipsparseZcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsrsm_solve", {"hipsparseScsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm_solve", {"hipsparseDcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm_solve", {"hipsparseCcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm_solve", {"hipsparseZcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsrsm_solve", {"hipsparseScsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrsm_solve", {"hipsparseDcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsrsm_solve", {"hipsparseCcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseZcsrsm_solve", {"hipsparseZcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrsm2_bufferSizeExt", {"hipsparseScsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm2_bufferSizeExt", {"hipsparseDcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm2_bufferSizeExt", {"hipsparseCcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm2_bufferSizeExt", {"hipsparseZcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsrsm2_bufferSizeExt", {"hipsparseScsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrsm2_bufferSizeExt", {"hipsparseDcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE,}}, + {"cusparseCcsrsm2_bufferSizeExt", {"hipsparseCcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseZcsrsm2_bufferSizeExt", {"hipsparseZcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrsm2_analysis", {"hipsparseScsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm2_analysis", {"hipsparseDcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm2_analysis", {"hipsparseCcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm2_analysis", {"hipsparseZcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsrsm2_analysis", {"hipsparseScsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrsm2_analysis", {"hipsparseDcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsrsm2_analysis", {"hipsparseCcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseZcsrsm2_analysis", {"hipsparseZcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseScsrsm2_solve", {"hipsparseScsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDcsrsm2_solve", {"hipsparseDcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseCcsrsm2_solve", {"hipsparseCcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZcsrsm2_solve", {"hipsparseZcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseXcsrsm2_zeroPivot", {"hipsparseXcsrsm2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseXcsrsm2_zeroPivot", {"hipsparseXcsrsm2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseSbsrmm", {"hipsparseSbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseDbsrmm", {"hipsparseDbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, @@ -619,10 +619,10 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseChyb2dense", {"hipsparseChyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZhyb2dense", {"hipsparseZhyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSnnz", {"hipsparseSnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnnz", {"hipsparseDnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCnnz", {"hipsparseCnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZnnz", {"hipsparseZnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseSnnz", {"hipsparseSnnz", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDnnz", {"hipsparseDnnz", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCnnz", {"hipsparseCnnz", "", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseZnnz", {"hipsparseZnnz", "", CONV_LIB_FUNC, API_SPARSE}}, {"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", "", CONV_LIB_FUNC, API_SPARSE}}, diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp index 1d3fe28c62..aae85a50d3 100644 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp +++ b/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp @@ -41,8 +41,8 @@ const std::map CUDA_SPARSE_TYPE_NAME_MAP{ {"csrsv2Info", {"csrsv2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, {"csrsv2Info_t", {"csrsv2Info_t", "", CONV_TYPE, API_SPARSE}}, - {"csrsm2Info", {"csrsm2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"csrsm2Info_t", {"csrsm2Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, + {"csrsm2Info", {"csrsm2Info", "", CONV_TYPE, API_SPARSE}}, + {"csrsm2Info_t", {"csrsm2Info_t", "", CONV_TYPE, API_SPARSE}}, {"bsrsv2Info", {"bsrsv2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, {"bsrsv2Info_t", {"bsrsv2Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, @@ -88,9 +88,9 @@ const std::map CUDA_SPARSE_TYPE_NAME_MAP{ {"CUSPARSE_ACTION_SYMBOLIC", {"HIPSPARSE_ACTION_SYMBOLIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, {"CUSPARSE_ACTION_NUMERIC", {"HIPSPARSE_ACTION_NUMERIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"cusparseDirection_t", {"hipsparseDirection_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_DIRECTION_ROW", {"HIPSPARSE_DIRECTION_ROW", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_DIRECTION_COLUMN", {"HIPSPARSE_DIRECTION_COLUMN", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDirection_t", {"hipsparseDirection_t", "", CONV_TYPE, API_SPARSE}}, + {"CUSPARSE_DIRECTION_ROW", {"HIPSPARSE_DIRECTION_ROW", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, + {"CUSPARSE_DIRECTION_COLUMN", {"HIPSPARSE_DIRECTION_COLUMN", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, {"cusparseHybPartition_t", {"hipsparseHybPartition_t", "", CONV_TYPE, API_SPARSE}}, {"CUSPARSE_HYB_PARTITION_AUTO", {"HIPSPARSE_HYB_PARTITION_AUTO", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index e6549dde0d..0a775df275 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -128,7 +128,7 @@ __device__ static int __mul24(int x, int y); __device__ static long long int __mul64hi(long long int x, long long int y); __device__ static int __mulhi(int x, int y); __device__ static int __rhadd(int x, int y); -__device__ static unsigned int __sad(int x, int y, int z); +__device__ static unsigned int __sad(int x, int y,unsigned int z); __device__ static unsigned int __uhadd(unsigned int x, unsigned int y); __device__ static int __umul24(unsigned int x, unsigned int y); __device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); @@ -199,7 +199,7 @@ __device__ static inline int __rhadd(int x, int y) { int value = z & 0x7FFFFFFF; return ((value) >> 1 || sign); } -__device__ static inline unsigned int __sad(int x, int y, int z) { +__device__ static inline unsigned int __sad(int x, int y, unsigned int z) { return x > y ? x - y + z : y - x + z; } __device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { @@ -230,7 +230,7 @@ __device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) { return (x + y + 1) >> 1; } __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) { - return __ockl_sad_u32(x, y, z); + return __ockl_sadd_u32(x, y, z); } __device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } @@ -563,7 +563,7 @@ long __shfl_xor(long var, int lane_mask, int width = warpSize) return tmp1; #else static_assert(sizeof(long) == sizeof(int), ""); - return static_cast(__shfl_down(static_cast(var), lane_mask, width)); + return static_cast(__shfl_xor(static_cast(var), lane_mask, width)); #endif } __device__ diff --git a/include/hip/hcc_detail/device_library_decls.h b/include/hip/hcc_detail/device_library_decls.h index ac35823cd2..2eadb86774 100644 --- a/include/hip/hcc_detail/device_library_decls.h +++ b/include/hip/hcc_detail/device_library_decls.h @@ -44,7 +44,7 @@ extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint); extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int); extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint); extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int); -extern "C" __device__ __attribute__((const)) uint __ockl_sad_u32(uint, uint, uint); +extern "C" __device__ __attribute__((const)) uint __ockl_sadd_u32(uint, uint, uint); extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar); extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort); @@ -72,6 +72,7 @@ extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank( extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void); extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void); +extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float); // Introduce local address space #define __local __attribute__((address_space(3))) diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index cf4422070f..a2ee601e3e 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -37,14 +37,15 @@ THE SOFTWARE. hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags, hip_impl::program_state& ps); -hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, void** kernelParams, - unsigned int sharedMemBytes, hipStream_t stream, hip_impl::program_state& ps); - -hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, - unsigned int flags, hip_impl::program_state& ps); - - +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, + dim3 blockDim, void** args, + size_t sharedMem, hipStream_t stream, + hip_impl::program_state& ps); +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags, + hip_impl::program_state& ps); #pragma GCC visibility push(hidden) @@ -192,22 +193,24 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, template inline __attribute__((visibility("hidden"))) -hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDimX, void** kernelParams, - unsigned int sharedMemBytes, hipStream_t stream) { - +hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMem, + hipStream_t stream) { hip_impl::hip_init(); auto& ps = hip_impl::get_program_state(); - return ihipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, blockDimX, kernelParams, sharedMemBytes, stream, ps); + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, + blockDim, args, sharedMem, stream, ps); } inline __attribute__((visibility("hidden"))) -hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, - unsigned int flags) { +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags) { hip_impl::hip_init(); auto& ps = hip_impl::get_program_state(); - return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps); + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps); } #pragma GCC visibility pop diff --git a/include/hip/hcc_detail/hip_atomic.h b/include/hip/hcc_detail/hip_atomic.h index 263f639e96..d00ebcdabb 100644 --- a/include/hip/hcc_detail/hip_atomic.h +++ b/include/hip/hcc_detail/hip_atomic.h @@ -73,6 +73,14 @@ float atomicAdd(float* address, float val) return __uint_as_float(r); } + +__device__ +inline +void atomicAddNoRet(float* address, float val) +{ + __ockl_atomic_add_noret_f32(address, val); +} + __device__ inline double atomicAdd(double* address, double val) diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h index 3fa18dae2e..6fa86e94b9 100644 --- a/include/hip/hcc_detail/hip_fp16.h +++ b/include/hip/hcc_detail/hip_fp16.h @@ -1268,6 +1268,13 @@ THE SOFTWARE. static_cast<__half_raw>(x).data + static_cast<__half_raw>(y).data}; } + inline + __device__ + __half __habs(__half x) + { + return __half_raw{ + __ocml_fabs_f16(static_cast<__half_raw>(x).data)}; + } inline __device__ __half __hsub(__half x, __half y) @@ -1334,6 +1341,13 @@ THE SOFTWARE. static_cast<__half2_raw>(x).data + static_cast<__half2_raw>(y).data}; } + inline + __device__ + __half2 __habs2(__half2 x) + { + return __half2_raw{ + __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)}; + } inline __device__ __half2 __hsub2(__half2 x, __half2 y) diff --git a/include/hip/hcc_detail/hip_fp16_math_fwd.h b/include/hip/hcc_detail/hip_fp16_math_fwd.h index eeb617c40b..95403e6ca8 100644 --- a/include/hip/hcc_detail/hip_fp16_math_fwd.h +++ b/include/hip/hcc_detail/hip_fp16_math_fwd.h @@ -38,6 +38,7 @@ extern "C" __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16); + __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16); __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16); __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); @@ -58,6 +59,7 @@ extern "C" #endif __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16); __device__ __2f16 __ocml_cos_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16); diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index a3a0963ba9..fdb61e70d3 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -504,9 +504,14 @@ hc_get_workitem_absolute_id(int dim) #define __CUDA__ #include <__clang_cuda_math_forward_declares.h> #include <__clang_cuda_complex_builtins.h> -#include -#include -#include +// Workaround for using libc++ with HIP-Clang. +// The following headers requires clang include path before standard C++ include path. +// However libc++ include path requires to be before clang include path. +// To workaround this, we pass -isystem with the parent directory of clang include +// path instead of the clang include path itself. +#include +#include +#include #undef __CUDA__ #pragma pop_macro("__CUDA__") #endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 67bd2486d0..206a2e5835 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -55,7 +55,7 @@ THE SOFTWARE. #define DEPRECATED(msg) __attribute__ ((deprecated(msg))) #endif // !defined(_MSC_VER) -#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases.For more details please refer https://github.com/ROCm-Developer-Tools/HIP/tree/master/docs/markdown/hip_deprecated_api_list" +#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases. For more details please refer https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_deprecated_api_list.md" #if defined(__HCC__) && (__hcc_workweek__ < 16155) #error("This version of HIP requires a newer version of HCC."); diff --git a/include/hip/hcc_detail/hip_vector_types.h b/include/hip/hcc_detail/hip_vector_types.h index 39457795ae..19259a3657 100644 --- a/include/hip/hcc_detail/hip_vector_types.h +++ b/include/hip/hcc_detail/hip_vector_types.h @@ -34,7 +34,7 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -#if !defined(_MSC_VER) || __clang__ +#if defined(__has_attribute) #if __has_attribute(ext_vector_type) #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n))) #else @@ -694,7 +694,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{}>::type* = nullptr> inline __host__ __device__ - HIP_vector_type operator-() noexcept + HIP_vector_type operator-() const noexcept { auto tmp(*this); tmp.data = -tmp.data; @@ -705,7 +705,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{}>::type* = nullptr> inline __host__ __device__ - HIP_vector_type operator~() noexcept + HIP_vector_type operator~() const noexcept { HIP_vector_type r{*this}; r.data = ~r.data; @@ -1241,7 +1241,9 @@ DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1); DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2); DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3); DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4); -#else // defined(_MSC_VER) +#else // !defined(__has_attribute) + +#if defined(_MSC_VER) #include #include #include @@ -1347,5 +1349,92 @@ typedef union { double4 data; } double3; typedef union { __m256d data[2]; } double8; typedef union { __m256d data[4]; } double16; +#else // !defined(_MSC_VER) + +typedef union { char data; } char1; +typedef union { char data[2]; } char2; +typedef union { char data[4]; } char4; +typedef union { char data[8]; } char8; +typedef union { char data[16]; } char16; +typedef union { char4 data; } char3; + +typedef union { unsigned char data; } uchar1; +typedef union { unsigned char data[2]; } uchar2; +typedef union { unsigned char data[4]; } uchar4; +typedef union { unsigned char data[8]; } uchar8; +typedef union { unsigned char data[16]; } uchar16; +typedef union { uchar4 data; } uchar3; + +typedef union { short data; } short1; +typedef union { short data[2]; } short2; +typedef union { short data[4]; } short4; +typedef union { short data[8]; } short8; +typedef union { short data[16]; } short16; +typedef union { short4 data; } short3; + +typedef union { unsigned short data; } ushort1; +typedef union { unsigned short data[2]; } ushort2; +typedef union { unsigned short data[4]; } ushort4; +typedef union { unsigned short data[8]; } ushort8; +typedef union { unsigned short data[16]; } ushort16; +typedef union { ushort4 data; } ushort3; + +typedef union { int data; } int1; +typedef union { int data[2]; } int2; +typedef union { int data[4]; } int4; +typedef union { int data[8]; } int8; +typedef union { int data[16]; } int16; +typedef union { int4 data; } int3; + +typedef union { unsigned int data; } uint1; +typedef union { unsigned int data[2]; } uint2; +typedef union { unsigned int data[4]; } uint4; +typedef union { unsigned int data[8]; } uint8; +typedef union { unsigned int data[16]; } uint16; +typedef union { uint4 data; } uint3; + +typedef union { long data; } long1; +typedef union { long data[2]; } long2; +typedef union { long data[4]; } long4; +typedef union { long data[8]; } long8; +typedef union { long data[16]; } long16; +typedef union { long4 data; } long3; + +typedef union { unsigned long data; } ulong1; +typedef union { unsigned long data[2]; } ulong2; +typedef union { unsigned long data[4]; } ulong4; +typedef union { unsigned long data[8]; } ulong8; +typedef union { unsigned long data[16]; } ulong16; +typedef union { ulong4 data; } ulong3; + +typedef union { long long data; } longlong1; +typedef union { long long data[2]; } longlong2; +typedef union { long long data[4]; } longlong4; +typedef union { long long data[8]; } longlong8; +typedef union { long long data[16]; } longlong16; +typedef union { longlong4 data; } longlong3; + +typedef union { unsigned long long data; } ulonglong1; +typedef union { unsigned long long data[2]; } ulonglong2; +typedef union { unsigned long long data[4]; } ulonglong4; +typedef union { unsigned long long data[8]; } ulonglong8; +typedef union { unsigned long long data[16]; } ulonglong16; +typedef union { ulonglong4 data; } ulonglong3; + +typedef union { float data; } float1; +typedef union { float data[2]; } float2; +typedef union { float data[4]; } float4; +typedef union { float data[8]; } float8; +typedef union { float data[16]; } float16; +typedef union { float4 data; } float3; + +typedef union { double data; } double1; +typedef union { double data[2]; } double2; +typedef union { double data[4]; } double4; +typedef union { double data[8]; } double8; +typedef union { double data[16]; } double16; +typedef union { double4 data; } double3; + #endif // defined(_MSC_VER) +#endif // defined(__has_attribute) #endif diff --git a/include/hip/hcc_detail/hiprtc.h b/include/hip/hcc_detail/hiprtc.h index 624f1ea157..ec9c85716a 100644 --- a/include/hip/hcc_detail/hiprtc.h +++ b/include/hip/hcc_detail/hiprtc.h @@ -28,6 +28,8 @@ extern "C" { #include +#pragma GCC visibility push (default) + enum hiprtcResult { HIPRTC_SUCCESS = 0, HIPRTC_ERROR_OUT_OF_MEMORY = 1, @@ -79,6 +81,8 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code); hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet); +#pragma GCC visibility pop + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/include/hip/hcc_detail/host_defines.h b/include/hip/hcc_detail/host_defines.h index 11bd577f08..b21946e99f 100644 --- a/include/hip/hcc_detail/host_defines.h +++ b/include/hip/hcc_detail/host_defines.h @@ -60,7 +60,7 @@ THE SOFTWARE. */ // _restrict is supported by the compiler #define __shared__ tile_static -#define __constant__ __attribute__((hc)) +#define __constant__ __attribute__((hc, annotate("__HIP_constant__"))) #elif defined(__clang__) && defined(__HIP__) diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index cf6a64ad65..b0974aeef6 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -321,7 +321,6 @@ typedef enum hipDeviceAttribute_t { hipDeviceAttributeIntegrated, ///< iGPU hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices - hipDeviceAttributeMaxTexture1DWidth, ///< Maximum number of elements in 1D images hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D images in image elements hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension height of 2D images in image elements diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index 6e0d02d0c0..d9eb3e4146 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -186,6 +186,7 @@ typedef struct cudaArray hipArray; typedef struct cudaArray* hipArray_t; typedef struct cudaArray* hipArray_const_t; typedef struct cudaFuncAttributes hipFuncAttributes; +typedef struct cudaLaunchParams hipLaunchParams; #define hipFunction_attribute CUfunction_attribute #define hip_Memcpy2D CUDA_MEMCPY2D #define hipMemcpy3DParms cudaMemcpy3DParms @@ -860,7 +861,7 @@ inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, } -inline hipError_t hipMemcpyWithStream(void* dst, const void* src, +inline static hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind copyKind, hipStream_t stream) { cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes, @@ -1134,6 +1135,10 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int dev p_prop->integrated = cdprop.integrated; p_prop->cooperativeLaunch = cdprop.cooperativeLaunch; p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch; + p_prop->cooperativeMultiDeviceUnmatchedFunc = 0; + p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0; + p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0; + p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0; p_prop->maxTexture1D = cdprop.maxTexture1D; p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0]; @@ -1271,6 +1276,12 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att case hipDeviceAttributeEccEnabled: cdattr = cudaDevAttrEccEnabled; break; + case hipDeviceAttributeCooperativeLaunch: + cdattr = cudaDevAttrCooperativeLaunch; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + cdattr = cudaDevAttrCooperativeMultiDeviceLaunch; + break; default: return hipCUDAErrorTohipError(cudaErrorInvalidValue); } @@ -1679,6 +1690,17 @@ inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_ return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array)); } +inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream) { + return hipCUDAErrorTohipError( + cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream)); +} + +inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags)); +} #ifdef __cplusplus } @@ -1686,6 +1708,17 @@ inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_ #ifdef __CUDACC__ +template +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + T func, + int blockSize, + size_t dynamicSMemSize) { + cudaError_t cerror; + cerror = + cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, blockSize, dynamicSMemSize); + return hipCUDAErrorTohipError(cerror); +} + template inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func, size_t dynamicSMemSize = 0, @@ -1742,6 +1775,14 @@ template inline static hipChannelFormatDesc hipCreateChannelDesc() { return cudaCreateChannelDesc(); } + +template +inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { + return hipCUDAErrorTohipError( + cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream)); +} + #endif //__CUDACC__ #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H diff --git a/lpl_ca/CMakeLists.txt b/lpl_ca/CMakeLists.txt index ac01a6a0ab..f626b88d89 100644 --- a/lpl_ca/CMakeLists.txt +++ b/lpl_ca/CMakeLists.txt @@ -14,7 +14,7 @@ install(TARGETS lpl RUNTIME DESTINATION bin) #-------------------------------------LPL--------------------------------------# #-------------------------------------CA---------------------------------------# -add_executable(ca ca.cpp ${PROJECT_SOURCE_DIR}/src/code_object_bundle.cpp) +add_executable(ca ca.cpp) set_target_properties( ca PROPERTIES CXX_STANDARD 11 diff --git a/lpl_ca/ca.hpp b/lpl_ca/ca.hpp index db63f02498..2d691cd38a 100644 --- a/lpl_ca/ca.hpp +++ b/lpl_ca/ca.hpp @@ -2,7 +2,7 @@ #include "common.hpp" -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "../src/code_object_bundle.inl" #include "clara/clara.hpp" diff --git a/packaging/hip-base.txt b/packaging/hip-base.txt index 4ff936dba4..fc8becf84f 100644 --- a/packaging/hip-base.txt +++ b/packaging/hip-base.txt @@ -25,16 +25,15 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR} set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") -set(CPACK_DEBIAN_PACKAGE_DEPENDS "perl (>= 5.0), llvm-amdgpu") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_base") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "perl (>= 5.0)") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-base") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_base") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_base") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") -set(CPACK_RPM_PACKAGE_REQUIRES "perl >= 5.0, llvm-amdgpu") +set(CPACK_RPM_PACKAGE_REQUIRES "perl >= 5.0") set(CPACK_RPM_PACKAGE_OBSOLETES "hip_base") set(CPACK_RPM_PACKAGE_CONFLICTS "hip_base") set(CPACK_BINARY_RPM "ON") diff --git a/packaging/hip-doc.txt b/packaging/hip-doc.txt index d97ddc7d3a..41db246d31 100644 --- a/packaging/hip-doc.txt +++ b/packaging/hip-doc.txt @@ -32,9 +32,8 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR} set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION})") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_doc") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-doc") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_doc") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_doc") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") diff --git a/packaging/hip-hcc.txt b/packaging/hip-hcc.txt index 21e138e1ed..6a04ebffbd 100644 --- a/packaging/hip-hcc.txt +++ b/packaging/hip-hcc.txt @@ -37,9 +37,8 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@), comgr (>= 1.1)") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_hcc") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-hcc") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_hcc") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_hcc") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") diff --git a/packaging/hip-nvcc.txt b/packaging/hip-nvcc.txt index dc36b628c7..4b11939609 100644 --- a/packaging/hip-nvcc.txt +++ b/packaging/hip-nvcc.txt @@ -19,9 +19,8 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION}), cuda (>= 7.5)") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_nvcc") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-nvcc") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_nvcc") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_nvcc") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") diff --git a/packaging/hip-samples.txt b/packaging/hip-samples.txt index 737f048d8f..1f3e088153 100644 --- a/packaging/hip-samples.txt +++ b/packaging/hip-samples.txt @@ -19,10 +19,9 @@ set(CPACK_PACKAGE_VERSION_PATCH @HIP_VERSION_PATCH@) set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}) set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") -set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-vdi (= ${CPACK_PACKAGE_VERSION})") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_samples") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION})") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-samples") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_samples") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_samples") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") diff --git a/packaging/hip-targets.cmake b/packaging/hip-targets.cmake index ac72419f52..6f6957f4d6 100644 --- a/packaging/hip-targets.cmake +++ b/packaging/hip-targets.cmake @@ -114,8 +114,6 @@ set_target_properties(hip::device PROPERTIES else() set_target_properties(hip::device PROPERTIES INTERFACE_LINK_LIBRARIES "hip::host" - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" ) endif() diff --git a/packaging/hip-vdi.txt b/packaging/hip-vdi.txt index c80e4aed6f..eefdcf69fb 100644 --- a/packaging/hip-vdi.txt +++ b/packaging/hip-vdi.txt @@ -27,11 +27,7 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR} set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") -if(@COMPILE_HIP_ATP_MARKER@) - set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), rocm-profiler, comgr (>= 1.1)") -else() - set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), comgr (>= 1.1)") -endif() +set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), comgr (>= 1.1), llvm-amdgpu") set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_vdi, hip-hcc (= ${CPACK_PACKAGE_VERSION})") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_vdi") set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_vdi") @@ -41,11 +37,7 @@ set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION}) -if(@COMPILE_HIP_ATP_MARKER@) - set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, rocm-profiler, comgr >= 1.1") -else() - set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, comgr >= 1.1") -endif() +set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, comgr >= 1.1, llvm-amdgpu") set(CPACK_RPM_PACKAGE_PROVIDES "hip_vdi, hip-hcc = ${HIP_BASE_VERSION}") set(CPACK_RPM_PACKAGE_OBSOLETES "hip_vdi") set(CPACK_RPM_PACKAGE_CONFLICTS "hip_vdi") diff --git a/samples/0_Intro/module_api/defaultDriver.cpp b/samples/0_Intro/module_api/defaultDriver.cpp index ea36aabcf4..af8b413ac2 100644 --- a/samples/0_Intro/module_api/defaultDriver.cpp +++ b/samples/0_Intro/module_api/defaultDriver.cpp @@ -80,8 +80,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/module_api/launchKernelHcc.cpp b/samples/0_Intro/module_api/launchKernelHcc.cpp index 38cf0d414c..90e569c5bc 100644 --- a/samples/0_Intro/module_api/launchKernelHcc.cpp +++ b/samples/0_Intro/module_api/launchKernelHcc.cpp @@ -107,8 +107,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/module_api/runKernel.cpp b/samples/0_Intro/module_api/runKernel.cpp index a011b42666..1093b0dd54 100644 --- a/samples/0_Intro/module_api/runKernel.cpp +++ b/samples/0_Intro/module_api/runKernel.cpp @@ -99,8 +99,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/module_api_global/runKernel.cpp b/samples/0_Intro/module_api_global/runKernel.cpp index 3a2804b7a2..4a2d49144c 100644 --- a/samples/0_Intro/module_api_global/runKernel.cpp +++ b/samples/0_Intro/module_api_global/runKernel.cpp @@ -154,8 +154,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index e17f19675a..14faa7671b 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -56,6 +56,7 @@ void printCompilerInfo() { #endif } +double bytesToKB(size_t s) { return (double)s / (1024.0); } double bytesToGB(size_t s) { return (double)s / (1024.0 * 1024.0 * 1024.0); } #define printLimit(w1, limit, units) \ @@ -97,7 +98,7 @@ void printDeviceProp(int deviceId) { cout << setw(w1) << "totalGlobalMem: " << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl; cout << setw(w1) << "maxSharedMemoryPerMultiProcessor: " << fixed << setprecision(2) - << bytesToGB(props.maxSharedMemoryPerMultiProcessor) << " GB" << endl; + << bytesToKB(props.maxSharedMemoryPerMultiProcessor) << " KB" << endl; cout << setw(w1) << "totalConstMem: " << props.totalConstMem << endl; cout << setw(w1) << "sharedMemPerBlock: " << (float)props.sharedMemPerBlock / 1024.0 << " KB" << endl; diff --git a/src/code_object_bundle.cpp b/src/code_object_bundle.cpp deleted file mode 100644 index feef90a61a..0000000000 --- a/src/code_object_bundle.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "../include/hip/hcc_detail/code_object_bundle.hpp" - -#include - -#include -#include -#include -#include -#include - -using namespace std; - -// CREATORS -hip_impl::Bundled_code_header::Bundled_code_header(const vector& x) - : Bundled_code_header{x.cbegin(), x.cend()} {} - -hip_impl::Bundled_code_header::Bundled_code_header( - const void* p) { // This is a pretty terrible interface, useful only because - // hipLoadModuleData is so poorly specified (for no fault of its own). - if (!p) return; - - if (!valid(*static_cast(p))) return; - auto ph = static_cast(p); - - size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header); - auto pb = static_cast(p) + sizeof(Header_); - auto n = ph->bundle_cnt_; - while (n--) { - sz += reinterpret_cast(pb)->bundle_sz; - pb += sizeof(Bundled_code::Header); - } - - read(static_cast(p), static_cast(p) + sz, *this); -} diff --git a/include/hip/hcc_detail/code_object_bundle.hpp b/src/code_object_bundle.inl similarity index 86% rename from include/hip/hcc_detail/code_object_bundle.hpp rename to src/code_object_bundle.inl index 77e0d706d6..596ac60661 100644 --- a/include/hip/hcc_detail/code_object_bundle.hpp +++ b/src/code_object_bundle.inl @@ -92,10 +92,6 @@ struct Bundled_code { #define magic_string_ "__CLANG_OFFLOAD_BUNDLE__" -#ifdef __GNUC__ -#pragma GCC visibility push (default) -#endif - class Bundled_code_header { // DATA - STATICS static constexpr auto magic_string_sz_ = sizeof(magic_string_) - 1; @@ -167,8 +163,26 @@ class Bundled_code_header { Bundled_code_header() = default; template Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l); - explicit Bundled_code_header(const std::vector& blob); - explicit Bundled_code_header(const void* maybe_blob); + explicit Bundled_code_header(const std::vector& blob) + : Bundled_code_header{blob.cbegin(), blob.cend()} {} + explicit Bundled_code_header(const void* maybe_blob) { + // This is a pretty terrible interface, useful only because + // hipLoadModuleData is so poorly specified (for no fault of its own). + if (!maybe_blob) return; + + if (!valid(*static_cast(maybe_blob))) return; + auto ph = static_cast(maybe_blob); + + size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header); + auto pb = static_cast(maybe_blob) + sizeof(Header_); + auto n = ph->bundle_cnt_; + while (n--) { + sz += reinterpret_cast(pb)->bundle_sz; + pb += sizeof(Bundled_code::Header); + } + + read(static_cast(maybe_blob), static_cast(maybe_blob) + sz, *this); + } Bundled_code_header(const Bundled_code_header&) = default; Bundled_code_header(Bundled_code_header&&) = default; ~Bundled_code_header() = default; @@ -180,10 +194,6 @@ class Bundled_code_header { size_t bundled_code_size = 0; }; -#ifdef __GNUC__ -#pragma GCC visibility pop -#endif - // CREATORS template Bundled_code_header::Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l) diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp index e8f3e86881..75c13038a5 100644 --- a/src/hip_clang.cpp +++ b/src/hip_clang.cpp @@ -51,7 +51,7 @@ __hipRegisterFatBinary(const void* data) return nullptr; } - auto modules = new std::vector{g_deviceCnt}; + auto modules = new std::vector(g_deviceCnt); if (!modules) { return nullptr; } @@ -136,7 +136,7 @@ extern "C" void __hipRegisterFunction( int* wSize) { HIP_INIT_API(NONE, modules, hostFunction, deviceFunction, deviceName); - std::vector functions{g_deviceCnt}; + std::vector functions(g_deviceCnt); assert(modules && modules->size() >= g_deviceCnt); for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { diff --git a/src/hip_device.cpp b/src/hip_device.cpp index 1bbdb10bbc..e5797727ae 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -310,6 +310,18 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device case hipDeviceAttributeCooperativeMultiDeviceLaunch: *pi = prop->cooperativeMultiDeviceLaunch; break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc: + *pi = prop->cooperativeMultiDeviceUnmatchedFunc; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim: + *pi = prop->cooperativeMultiDeviceUnmatchedGridDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim: + *pi = prop->cooperativeMultiDeviceUnmatchedBlockDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem: + *pi = prop->cooperativeMultiDeviceUnmatchedSharedMem; + break; case hipDeviceAttributeMaxPitch: *pi = prop->memPitch; break; diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index c9688408c8..807dcc7391 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -677,7 +677,7 @@ hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) { break; case HSA_REGION_SEGMENT_GROUP: err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, - &(p_prop->sharedMemPerBlock)); + &(p_prop->maxSharedMemoryPerMultiProcessor)); break; default: break; @@ -835,10 +835,8 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) { hsa_region_t* am_region = static_cast(_acc.get_hsa_am_region()); err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); DeviceErrorCheck(err); - // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. - // Group memory will not be paged out, so, the physical memory size is the total shared memory - // size, and also equal to the group pool size. - prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; + // Current GPUs allow a workgroup to use all of LDS in a CU, so these two are equal. + prop->sharedMemPerBlock = prop->maxSharedMemoryPerMultiProcessor; // Get Max memory clock frequency err = @@ -897,9 +895,16 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) { prop->integrated = 1; } - // Enable the cooperative group for gfx9+ - prop->cooperativeLaunch = (prop->gcnArch < 900) ? 0 : 1; - prop->cooperativeMultiDeviceLaunch = (prop->gcnArch < 900) ? 0 : 1; + // Enable the cooperative group for GPUs that support all the required features + err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES, + &prop->cooperativeLaunch); + DeviceErrorCheck(err); + prop->cooperativeMultiDeviceLaunch = prop->cooperativeLaunch; + + prop->cooperativeMultiDeviceUnmatchedFunc = prop->cooperativeMultiDeviceLaunch; + prop->cooperativeMultiDeviceUnmatchedGridDim = prop->cooperativeMultiDeviceLaunch; + prop->cooperativeMultiDeviceUnmatchedBlockDim = prop->cooperativeMultiDeviceLaunch; + prop->cooperativeMultiDeviceUnmatchedSharedMem = prop->cooperativeMultiDeviceLaunch; err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS, &prop->maxTexture1D); @@ -1515,20 +1520,6 @@ hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream) { return e; } -void ihipStreamCallbackHandler(ihipStreamCallback_t* cb) { - hipError_t e = hipSuccess; - - // Synchronize stream - tprintf(DB_SYNC, "ihipStreamCallbackHandler wait on stream %s\n", - ToString(cb->_stream).c_str()); - GET_TLS(); - e = ihipStreamSynchronize(tls, cb->_stream); - - // Call registered callback function - cb->_callback(cb->_stream, e, cb->_userData); - delete cb; -} - //--- // Get the stream to use for a command submission. // @@ -1619,7 +1610,9 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp, // Allows runtime to track some information about the stream. hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired) { - stream = ihipSyncAndResolveStream(stream, lockAcquired); + if (stream == nullptr || stream != stream->getCtx()->_defaultStream) { + stream = ihipSyncAndResolveStream(stream, lockAcquired); + } lp->grid_dim.x = grid.x; lp->grid_dim.y = grid.y; lp->grid_dim.z = grid.z; diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 104fd910a8..993dc31dfc 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -654,19 +654,6 @@ class ihipStream_t { }; -//---- -// Internal structure for stream callback handler -class ihipStreamCallback_t { - public: - ihipStreamCallback_t(hipStream_t stream, hipStreamCallback_t callback, void* userData) - : _stream(stream), _callback(callback), _userData(userData) { - }; - hipStream_t _stream; - hipStreamCallback_t _callback; - void* _userData; -}; - - //---- // Internal event structure: enum hipEventStatus_t { @@ -980,7 +967,6 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod, hipStream_t ihipSyncAndResolveStream(hipStream_t, bool lockAcquired = 0); hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream); -void ihipStreamCallbackHandler(ihipStreamCallback_t* cb); /** * @brief Copies the memory address and size of symbol @p symbolName diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 8159f22a97..e7bc348951 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -309,31 +309,52 @@ void generic_copy(void* __restrict dst, const void* __restrict src, size_t n, if (di.size == is_cpu_owned) return d2h_copy(dst, src, n, si); if (si.size == is_cpu_owned) return h2d_copy(dst, src, n, di); - throwing_result_check(hsa_amd_agents_allow_access(1u, &si.agentOwner, - nullptr, - di.agentBaseAddress), - __FILE__, __func__, __LINE__); - - return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + hsa_status_t res = hsa_amd_agents_allow_access(1u, &si.agentOwner, + nullptr, di.agentBaseAddress); + if (res == HSA_STATUS_SUCCESS){ + return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + } + // If devices do not have access then fallback mechanism will be used + // copy will be slower + throwing_result_check(hsa_memory_copy(dst,src,n), __FILE__, __func__, __LINE__); } inline void memcpy_impl(void* __restrict dst, const void* __restrict src, size_t n, hipMemcpyKind k) { + auto si{info(src)}; + auto di{info(dst)}; + + if (!is_large_BAR){ + // Pointer info takes presidence over hipMemcpyKind + // if there is mismatch b/w Memcpy kind and dst/src pointer + // E.g. dst(host pointer),src(device pointer) and hipMemcpyKind set as hipMemcpyHostToDevice + if (di.size == is_cpu_owned && si.size == is_cpu_owned) + k = hipMemcpyHostToHost; + else if (si.size == is_cpu_owned && di.size != is_cpu_owned) + k = hipMemcpyHostToDevice; + else if (di.size == is_cpu_owned && si.size != is_cpu_owned) + k = hipMemcpyDeviceToHost; + else + k = hipMemcpyDeviceToDevice; + } switch (k) { case hipMemcpyHostToHost: std::memcpy(dst, src, n); break; - case hipMemcpyHostToDevice: return h2d_copy(dst, src, n, info(dst)); - case hipMemcpyDeviceToHost: return d2h_copy(dst, src, n, info(src)); + case hipMemcpyHostToDevice: return h2d_copy(dst, src, n, di); + case hipMemcpyDeviceToHost: return d2h_copy(dst, src, n, si); case hipMemcpyDeviceToDevice: { - const auto di{info(dst)}; - const auto si{info(src)}; - throwing_result_check(hsa_amd_agents_allow_access(1u, &si.agentOwner, - nullptr, - di.agentBaseAddress), - __FILE__, __func__, __LINE__); - return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + hsa_status_t res = hsa_amd_agents_allow_access(1u, &si.agentOwner, + nullptr, di.agentBaseAddress); + if (res == HSA_STATUS_SUCCESS){ + return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + } + + // If devices do not have access then fallback mechanism will be used + // copy will be slower + throwing_result_check(hsa_memory_copy(dst,src,n), __FILE__, __func__, __LINE__); + break; } - default: return generic_copy(dst, src, n, info(dst), info(src)); + default: return generic_copy(dst, src, n, di, si); } } @@ -478,6 +499,10 @@ void* allocAndSharePtr(const char* msg, size_t sizeBytes, ihipCtx_t* ctx, bool s hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) { hipError_t hip_status = hipSuccess; + if (sizeBytes == 0) { + return hipSuccess; + } + if (HIP_SYNC_HOST_ALLOC) { hipDeviceSynchronize(); } @@ -485,10 +510,6 @@ hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned i auto ctx = ihipGetTlsDefaultCtx(); if ((ctx == nullptr) || (ptr == nullptr)) { hip_status = hipErrorInvalidValue; - } - else if (sizeBytes == 0) { - hip_status = hipSuccess; - // TODO - should size of 0 return err or be siliently ignored? } else { unsigned trueFlags = flags; if (flags == hipHostMallocDefault) { @@ -673,14 +694,15 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_SET_DEVICE(); hipError_t hip_status = hipSuccess; + if (sizeBytes == 0) { + if (ptr) *ptr = NULL; + return ihipLogStatus(hipSuccess); + } + auto ctx = ihipGetTlsDefaultCtx(); // return NULL pointer when malloc size is 0 if ( nullptr == ctx || nullptr == ptr) { hip_status = hipErrorInvalidValue; - } - else if (sizeBytes == 0) { - *ptr = NULL; - hip_status = hipSuccess; } else { auto device = ctx->getWriteableDevice(); *ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false /*shareWithAll*/, @@ -700,14 +722,15 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag HIP_SET_DEVICE(); #if (__hcc_workweek__ >= 19115) + if (sizeBytes == 0) { + if (ptr) *ptr = NULL; + return ihipLogStatus(hipSuccess); + } + hipError_t hip_status = hipSuccess; auto ctx = ihipGetTlsDefaultCtx(); - // return NULL pointer when malloc size is 0 - if (sizeBytes == 0) { - *ptr = NULL; - hip_status = hipSuccess; - } else if ((ctx == nullptr) || (ptr == nullptr)) { + if ((ctx == nullptr) || (ptr == nullptr)) { hip_status = hipErrorInvalidValue; } else { unsigned amFlags = 0; @@ -736,6 +759,9 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_SPECIAL_API(hipHostMalloc, (TRACE_MEM), ptr, sizeBytes, flags); HIP_SET_DEVICE(); + if (sizeBytes == 0) { + return ihipLogStatus(hipSuccess); + } hipError_t hip_status = hipSuccess; hip_status = hip_internal::ihipHostMalloc(tls, ptr, sizeBytes, flags); return ihipLogStatus(hip_status); @@ -744,6 +770,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) { HIP_INIT_SPECIAL_API(hipMallocManaged, (TRACE_MEM), devPtr, size, flags); HIP_SET_DEVICE(); + if (size == 0) { + return ihipLogStatus(hipSuccess); + } hipError_t hip_status = hipSuccess; if(flags != hipMemAttachGlobal) hip_status = hipErrorInvalidValue; @@ -1224,6 +1253,7 @@ hipError_t hipMemcpyToSymbol(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); + if (count == 0) return ihipLogStatus(hipSuccess); if (dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1246,6 +1276,7 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); + if (count == 0) return ihipLogStatus(hipSuccess); if (src == nullptr || dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1269,6 +1300,7 @@ hipError_t hipMemcpyToSymbolAsync(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); + if (count == 0) return ihipLogStatus(hipSuccess); if (dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1301,6 +1333,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, src); + if (count == 0) return ihipLogStatus(hipSuccess); if (src == nullptr || dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1592,6 +1625,7 @@ hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bo srcXoffset = p->srcPos.x; srcYoffset = p->srcPos.y; srcZoffset = p->srcPos.z; + if (copyWidth == 0) return hipSuccess; if (p->dstArray != nullptr) { if ((p->dstArray->isDrv == true) ||( p->dstPtr.ptr!= nullptr)){ return hipErrorInvalidValue; @@ -1933,6 +1967,7 @@ hipError_t getLockedPointer(void *hostPtr, size_t dataLen, void **devicePtrPtr) // TODO - review and optimize hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { + if (height == 0 || width == 0) return hipSuccess; if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue; hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); @@ -1989,6 +2024,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, hipError_t ihipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + if (height == 0 || width == 0) return hipSuccess; if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue; hipError_t e = hipSuccess; int isLockedOrD2D = 0; @@ -2043,6 +2079,7 @@ hipError_t ihip2dOffsetMemcpy(void* dst, size_t dpitch, const void* src, size_t size_t height, size_t srcXOffsetInBytes, size_t srcYOffset, size_t dstXOffsetInBytes, size_t dstYOffset,hipMemcpyKind kind, hipStream_t stream, bool isAsync) { + if (height == 0 || width == 0) return hipSuccess; if((spitch < width + srcXOffsetInBytes) || (srcYOffset >= height)){ return hipErrorInvalidValue; } else if((dpitch < width + dstXOffsetInBytes) || (dstYOffset >= height)){ @@ -2061,6 +2098,7 @@ hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool if (pCopy == nullptr) { return hipErrorInvalidValue; } + if (pCopy->Height == 0 || pCopy->WidthInBytes == 0) return hipSuccess; void* dst; const void* src; size_t spitch = pCopy->srcPitch; size_t dpitch = pCopy->dstPitch; @@ -2140,6 +2178,7 @@ hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream ){ HIP_INIT_SPECIAL_API(hipMemcpy2DFromArrayAsync, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind, stream); size_t byteSize; + if (height == 0 || width == 0) return ihipLogStatus(hipSuccess); if(src) { switch (src->desc.f) { case hipChannelFormatKindSigned: @@ -2239,8 +2278,6 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { auto device = ctx->getWriteableDevice(); if (total) { *total = device->_props.totalGlobalMem; - } else { - e = hipErrorInvalidValue; } if (free) { @@ -2263,8 +2300,6 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { } else { return ihipLogStatus(hipErrorInvalidValue); } - } else { - e = hipErrorInvalidValue; } } else { diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 0a7348a3a2..1e467899c6 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -50,7 +50,7 @@ THE SOFTWARE. #include #include #include -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "code_object_bundle.inl" #include "hip_fatbin.h" // TODO Use Pool APIs from HCC to get memory regions. @@ -140,7 +140,7 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global uint32_t localWorkSizeZ, size_t sharedMemBytes, hipStream_t hStream, void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags, bool isStreamLocked = 0, - void** impCoopParams = 0) { + void** impCoopParams = 0, hc::accelerator_view* coopAV = 0) { using namespace hip_impl; auto ctx = ihipGetTlsDefaultCtx(); @@ -192,8 +192,8 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global if (impCoopParams) { const auto p{static_cast(*impCoopParams)}; // The sixth index is for multi-grid synchronization - kernargs.insert((kernargs.cend() - padSize - HIP_IMPLICIT_KERNARG_SIZE) + 6 * HIP_IMPLICIT_KERNARG_ALIGNMENT, - p, p + HIP_IMPLICIT_KERNARG_ALIGNMENT); + copy(p, p + HIP_IMPLICIT_KERNARG_ALIGNMENT, + (kernargs.end() - HIP_IMPLICIT_KERNARG_SIZE) + 6 * HIP_IMPLICIT_KERNARG_ALIGNMENT); } /* @@ -245,6 +245,10 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global hc::completion_future cf; + if (coopAV) { + lp.av = coopAV; + } + lp.av->dispatch_hsa_kernel(&aql, kernargs.data(), kernargs.size(), (startEvent || stopEvent) ? &cf : nullptr #if (__hcc_workweek__ > 17312) @@ -326,22 +330,18 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList return hipErrorInvalidValue; } - hipFunction_t* kds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); - if (kds == nullptr) { - return hipErrorNotInitialized; - } + std::vector kds(numDevices,0); // prepare all kernel descriptors for each device as all streams will be locked in the next loop for (int i = 0; i < numDevices; ++i) { const hipLaunchParams& lp = launchParamsList[i]; if (lp.stream == nullptr) { - free(kds); return hipErrorNotInitialized; } kds[i] = ps.kernel_descriptor(reinterpret_cast(lp.func), hip_impl::target_agent(lp.stream)); + if (kds[i] == nullptr) { - free(kds); return hipErrorInvalidValue; } if (!kds[i]->_kernarg_layout.empty()) continue; @@ -396,8 +396,6 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList #endif } - free(kds); - return result; } @@ -409,6 +407,90 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, return ihipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags, ps); } +void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size_t* usedLDS) +{ + if (f->_is_code_object_v3) { + const auto header = reinterpret_cast(f->_header); + // GRANULATED_WAVEFRONT_VGPR_COUNT is specified in 0:5 bits of COMPUTE_PGM_RSRC1 + // the granularity for gfx6-gfx9 is max(0, ceil(vgprs_used / 4) - 1) + *usedVGPRS = ((header->compute_pgm_rsrc1 & 0x3F) + 1) << 2; + // GRANULATED_WAVEFRONT_SGPR_COUNT is specified in 6:9 bits of COMPUTE_PGM_RSRC1 + // the granularity for gfx9+ is 2 * max(0, ceil(sgprs_used / 16) - 1) + *usedSGPRS = ((((header->compute_pgm_rsrc1 & 0x3C0) >> 6) >> 1) + 1) << 4; + *usedLDS = header->group_segment_fixed_size; + } + else { + const auto header = f->_header; + // VGPRs granularity is 4 + *usedVGPRS = ((header->workitem_vgpr_count + 3) >> 2) << 2; + // adding 2 to take into account the 2 VCC registers & handle the granularity of 16 + *usedSGPRS = header->wavefront_sgpr_count + 2; + *usedSGPRS = ((*usedSGPRS + 15) >> 4) << 4; + *usedLDS = header->workgroup_group_segment_byte_size; + } +} + +static hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( + TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) +{ + using namespace hip_impl; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx == nullptr) { + return hipErrorInvalidDevice; + } + if (numBlocks == nullptr) { + return hipErrorInvalidValue; + } + + hipDeviceProp_t prop{}; + ihipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); + + if (blockSize > prop.maxThreadsPerBlock) { + *numBlocks = 0; + return hipSuccess; + } + + prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; + + size_t usedVGPRS = 0; + size_t usedSGPRS = 0; + size_t usedLDS = 0; + getGprsLdsUsage(f, &usedVGPRS, &usedSGPRS, &usedLDS); + + // Due to SPI and private memory limitations, the max of wavefronts per CU in 32 + size_t wavefrontSize = prop.warpSize; + size_t maxWavefrontsPerCU = min(prop.maxThreadsPerMultiProcessor / wavefrontSize, 32); + + const size_t simdPerCU = 4; + const size_t maxWavesPerSimd = maxWavefrontsPerCU / simdPerCU; + + size_t numWavefronts = (blockSize + wavefrontSize - 1) / wavefrontSize; + + size_t availableVGPRs = (prop.regsPerBlock / wavefrontSize / simdPerCU); + size_t vgprs_alu_occupancy = simdPerCU * (usedVGPRS == 0 ? maxWavesPerSimd + : std::min(maxWavesPerSimd, availableVGPRs / usedVGPRS)); + + // Calculate blocks occupancy per CU based on VGPR usage + *numBlocks = vgprs_alu_occupancy / numWavefronts; + + const size_t availableSGPRs = (prop.gcnArch < 800) ? 512 : 800; + size_t sgprs_alu_occupancy = simdPerCU * (usedSGPRS == 0 ? maxWavesPerSimd + : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); + + // Calculate blocks occupancy per CU based on SGPR usage + *numBlocks = std::min(*numBlocks, (uint32_t) (sgprs_alu_occupancy / numWavefronts)); + + size_t total_used_lds = usedLDS + dynSharedMemPerBlk; + if (total_used_lds != 0) { + // Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds) + size_t lds_occupancy = prop.maxSharedMemoryPerMultiProcessor / total_used_lds; + *numBlocks = std::min(*numBlocks, (uint32_t) lds_occupancy); + } + + return hipSuccess; +} + namespace { // kernel for initializing GWS // nwm1 is the total number of work groups minus 1 @@ -417,25 +499,28 @@ __global__ void init_gws(uint nwm1) { } } -__attribute__((visibility("default"))) hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, - dim3 blockDimX, void** kernelParams, unsigned int sharedMemBytes, + dim3 blockDim, void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream, hip_impl::program_state& ps) { +#if (__hcc_workweek__ >= 20093) hipError_t result; - if ((f == nullptr) || (stream == nullptr) || (kernelParams == nullptr)) { + if (f == nullptr || kernelParams == nullptr) { return hipErrorNotInitialized; } - if (!stream->getDevice()->_props.cooperativeLaunch) { + stream = ihipSyncAndResolveStream(stream); + + if (!stream->getDevice()->_props.cooperativeLaunch || + blockDim.x * blockDim.y * blockDim.z > stream->getDevice()->_props.maxThreadsPerBlock) { return hipErrorInvalidConfiguration; } - size_t globalWorkSizeX = (size_t)gridDim.x * (size_t)blockDimX.x; - size_t globalWorkSizeY = (size_t)gridDim.y * (size_t)blockDimX.y; - size_t globalWorkSizeZ = (size_t)gridDim.z * (size_t)blockDimX.z; + size_t globalWorkSizeX = (size_t)gridDim.x * (size_t)blockDim.x; + size_t globalWorkSizeY = (size_t)gridDim.y * (size_t)blockDim.y; + size_t globalWorkSizeZ = (size_t)gridDim.z * (size_t)blockDim.z; if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) { return hipErrorInvalidConfiguration; @@ -469,28 +554,49 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, kd->_kernarg_layout = *reinterpret_cast>*>(kargs.getHandle()); + GET_TLS(); + uint32_t numBlocksPerSm = 0; + result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kd, + blockDim.x * blockDim.y * blockDim.z, sharedMemBytes); + if (result != hipSuccess) { + return hipErrorLaunchFailure; + } + int maxActiveBlocks = numBlocksPerSm * stream->getDevice()->_props.multiProcessorCount; + + //check to see if the workload fits on the GPU + if (gridDim.x * gridDim.y * gridDim.z > maxActiveBlocks) { + return hipErrorCooperativeLaunchTooLarge; + } void *gwsKernelParam[1]; // calculate total number of work groups minus 1 for the main kernel uint nwm1 = (gridDim.x * gridDim.y * gridDim.z) - 1; gwsKernelParam[0] = &nwm1; - LockedAccessor_StreamCrit_t streamCrit(stream->criticalData(), false); -#if (__hcc_workweek__ >= 19213) - streamCrit->_av.acquire_locked_hsa_queue(); -#endif + hc::accelerator acc = stream->getDevice()->_acc; + // create a cooperative accelerated view for launching gws and main kernels + hc::accelerator_view coopAV = acc.create_cooperative_view(); - GET_TLS(); - // launch the init_gws kernel to initialize the GWS + LockedAccessor_StreamCrit_t streamCrit(stream->criticalData(), false); + + // the cooperative queue will wait until this stream completes its operations + hc::completion_future streamCF; + if (!streamCrit->_av.get_is_empty()) { + streamCF = streamCrit->_av.create_marker(hc::accelerator_scope); + coopAV.create_blocking_marker(streamCF, hc::accelerator_scope); + } + + streamCrit->_av.acquire_locked_hsa_queue(); + coopAV.acquire_locked_hsa_queue(); + + // launch the init_gws kernel to initialize the GWS in the dedicated cooperative queue result = ihipModuleLaunchKernel(tls, gwsKD, 1, 1, 1, 1, 1, 1, - 0, stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true); + 0, stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true, nullptr , &coopAV); if (result != hipSuccess) { stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) stream->criticalData()._av.release_locked_hsa_queue(); -#endif - + coopAV.release_locked_hsa_queue(); return hipErrorLaunchFailure; } @@ -498,60 +604,106 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, void* impCoopParams[1]; impCoopParams[0] = &impCoopArg; - // launch the main kernel + // launch the main kernel in the cooperative queue result = ihipModuleLaunchKernel(tls, kd, - gridDim.x * blockDimX.x, - gridDim.y * blockDimX.y, - gridDim.z * blockDimX.z, - blockDimX.x, blockDimX.y, blockDimX.z, + gridDim.x * blockDim.x, + gridDim.y * blockDim.y, + gridDim.z * blockDim.z, + blockDim.x, blockDim.y, blockDim.z, sharedMemBytes, stream, kernelParams, nullptr, nullptr, - nullptr, 0, true, impCoopParams); + nullptr, 0, true, impCoopParams, &coopAV); + + + coopAV.release_locked_hsa_queue(); + stream->criticalData()._av.release_locked_hsa_queue(); + + // this stream will wait until the cooperative queue completes its operations + hc::completion_future cooperativeCF; + if (!coopAV.get_is_empty()) { + cooperativeCF = coopAV.create_marker(hc::accelerator_scope); + streamCrit->_av.create_blocking_marker(cooperativeCF, hc::accelerator_scope); + } stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) - stream->criticalData()._av.release_locked_hsa_queue(); -#endif return result; +#else + return hipErrorInvalidConfiguration; +#endif + } __attribute__((visibility("default"))) +hipError_t hipLaunchCooperativeKernel(const void* func, dim3 gridDim, + dim3 blockDim, void** args, + size_t sharedMem, hipStream_t stream, + hip_impl::program_state& ps) { + + // Skipping passing in ps, because the logging function does not like it + HIP_INIT_API(hipLaunchCooperativeKernel, func, gridDim, blockDim, args, + sharedMem, stream); + + return ihipLogStatus(ihipLaunchCooperativeKernel(func, gridDim, blockDim, + args, sharedMem, stream, ps)); +} + + hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags, hip_impl::program_state& ps) { +#if (__hcc_workweek__ >= 20093) hipError_t result; if (numDevices > g_deviceCnt || launchParamsList == nullptr || numDevices > MAX_COOPERATIVE_GPUs) { return hipErrorInvalidValue; } + vector streams; + vector deviceIDs; + // check to see if we have valid distinct streams/devices, if cooperative multi device + // launch is supported and if grid/block dimensions are valid for (int i = 0; i < numDevices; ++i) { - if (!launchParamsList[i].stream->getDevice()->_props.cooperativeMultiDeviceLaunch) { + const hipLaunchParams& lp = launchParamsList[i]; + + if (lp.stream == nullptr){ + return hipErrorInvalidResourceHandle; + } + + if (find(streams.begin(), streams.end(), lp.stream) == streams.end()) { + streams.push_back(lp.stream); + } else { + return hipErrorInvalidDevice; + } + + const ihipDevice_t* currentDevice = lp.stream->getDevice(); + if (find(deviceIDs.begin(), deviceIDs.end(), currentDevice->_deviceId) == deviceIDs.end()) { + deviceIDs.push_back(currentDevice->_deviceId); + } else { + return hipErrorInvalidDevice; + } + + if (!currentDevice->_props.cooperativeMultiDeviceLaunch) { + return hipErrorInvalidConfiguration; + } + + if (lp.gridDim.x == 0 || lp.gridDim.y == 0 || lp.gridDim.z == 0 || + lp.blockDim.x == 0 || lp.blockDim.y == 0 || lp.blockDim.z == 0 || + lp.blockDim.x * lp.blockDim.y * lp.blockDim.z > currentDevice->_props.maxThreadsPerBlock){ return hipErrorInvalidConfiguration; } } - hipFunction_t* gwsKds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); - hipFunction_t* kds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); - - if (kds == nullptr || gwsKds == nullptr) { - return hipErrorNotInitialized; - } + vector gwsKds; + vector kds; + GET_TLS(); // prepare all kernel descriptors for initializing the GWS and the main kernels per device for (int i = 0; i < numDevices; ++i) { const hipLaunchParams& lp = launchParamsList[i]; - if (lp.stream == nullptr) { - free(gwsKds); - free(kds); - return hipErrorNotInitialized; - } - gwsKds[i] = ps.kernel_descriptor(reinterpret_cast(&init_gws), - hip_impl::target_agent(lp.stream)); + gwsKds.push_back(ps.kernel_descriptor(reinterpret_cast(&init_gws), + hip_impl::target_agent(lp.stream))); if (gwsKds[i] == nullptr) { - free(gwsKds); - free(kds); return hipErrorInvalidValue; } hip_impl::kernargs_size_align gwsKargs = ps.get_kernargs_size_align( @@ -560,23 +712,42 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL gwsKargs.getHandle()); - kds[i] = ps.kernel_descriptor(reinterpret_cast(lp.func), - hip_impl::target_agent(lp.stream)); + kds.push_back(ps.kernel_descriptor(reinterpret_cast(lp.func), + hip_impl::target_agent(lp.stream))); if (kds[i] == nullptr) { - free(gwsKds); - free(kds); return hipErrorInvalidValue; } hip_impl::kernargs_size_align kargs = ps.get_kernargs_size_align( reinterpret_cast(lp.func)); kds[i]->_kernarg_layout = *reinterpret_cast>*>( kargs.getHandle()); + + uint32_t numBlocksPerSm = 0; + result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kds[i], + lp.blockDim.x * lp.blockDim.y * lp.blockDim.z, lp.sharedMem); + if (result != hipSuccess) { + return hipErrorLaunchFailure; + } + int maxActiveBlocks = numBlocksPerSm * lp.stream->getDevice()->_props.multiProcessorCount; + + //check to see if the workload fits on the GPU + if (lp.gridDim.x * lp.gridDim.y * lp.gridDim.z > maxActiveBlocks) { + return hipErrorCooperativeLaunchTooLarge; + } + } + + vector coopAVs; + + // create cooperative accelerated views for launching gws and main kernels on each device + for (int i = 0; i < numDevices; ++i) { + hc::accelerator acc = launchParamsList[i].stream->getDevice()->_acc; + coopAVs.push_back(acc.create_cooperative_view()); } mg_sync *mg_sync_ptr = 0; - mg_info *mg_info_ptr[MAX_COOPERATIVE_GPUs] = {0}; + vector mg_info_ptr; + - GET_TLS(); result = hip_internal::ihipHostMalloc(tls, (void **)&mg_sync_ptr, sizeof(mg_sync), hipHostMallocDefault); if (result != hipSuccess) { return hipErrorInvalidValue; @@ -586,7 +757,8 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL uint all_sum = 0; for (int i = 0; i < numDevices; ++i) { - result = hip_internal::ihipHostMalloc(tls, (void **)&mg_info_ptr[i], sizeof(mg_info), hipHostMallocDefault); + mg_info *mg_info_temp = nullptr; + result = hip_internal::ihipHostMalloc(tls, (void **)&mg_info_temp, sizeof(mg_info), hipHostMallocDefault); if (result != hipSuccess) { hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < i; ++j) { @@ -594,6 +766,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL } return hipErrorInvalidValue; } + mg_info_ptr.push_back(mg_info_temp); // calculate the sum of sizes of all grids const hipLaunchParams& lp = launchParamsList[i]; all_sum += lp.blockDim.x * lp.blockDim.y * lp.blockDim.z * @@ -603,9 +776,15 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL // lock all streams before launching the blit kernels for initializing the GWS and main kernels to each device for (int i = 0; i < numDevices; ++i) { LockedAccessor_StreamCrit_t streamCrit(launchParamsList[i].stream->criticalData(), false); -#if (__hcc_workweek__ >= 19213) + + hc::completion_future streamCF; + if (!streamCrit->_av.get_is_empty()) { + streamCF = streamCrit->_av.create_marker(hc::accelerator_scope); + coopAVs[i].create_blocking_marker(streamCF, hc::accelerator_scope); + } + streamCrit->_av.acquire_locked_hsa_queue(); -#endif + coopAVs[i].acquire_locked_hsa_queue(); } // launch the init_gws kernel to initialize the GWS for each device @@ -617,14 +796,13 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL gwsKernelParam[0] = &nwm1; result = ihipModuleLaunchKernel(tls, gwsKds[i], 1, 1, 1, 1, 1, 1, - 0, lp.stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true); + 0, lp.stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true, nullptr, &coopAVs[i]); if (result != hipSuccess) { for (int j = 0; j < numDevices; ++j) { launchParamsList[j].stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) launchParamsList[j].stream->criticalData()._av.release_locked_hsa_queue(); -#endif + coopAVs[i].release_locked_hsa_queue(); } hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < numDevices; ++j) { @@ -670,14 +848,13 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL lp.blockDim.x, lp.blockDim.y, lp.blockDim.z, lp.sharedMem, lp.stream, lp.args, nullptr, nullptr, nullptr, 0, - true, impCoopParams); + true, impCoopParams, &coopAVs[i]); if (result != hipSuccess) { for (int j = 0; j < numDevices; ++j) { launchParamsList[j].stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) launchParamsList[j].stream->criticalData()._av.release_locked_hsa_queue(); -#endif + coopAVs[i].release_locked_hsa_queue(); } hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < numDevices; ++j) { @@ -691,14 +868,18 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL // unlock all streams for (int i = 0; i < numDevices; ++i) { - launchParamsList[i].stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) + coopAVs[i].release_locked_hsa_queue(); launchParamsList[i].stream->criticalData()._av.release_locked_hsa_queue(); -#endif - } - free(gwsKds); - free(kds); + hc::completion_future cooperativeCF; + if (!coopAVs[i].get_is_empty()) { + cooperativeCF = coopAVs[i].create_marker(hc::accelerator_scope); + launchParamsList[i].stream->criticalData()._av.create_blocking_marker( + cooperativeCF, hc::accelerator_scope); + } + + launchParamsList[i].stream->criticalData().unlock(); + } hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < numDevices; ++j) { @@ -706,6 +887,24 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL } return result; +#else + return hipErrorInvalidConfiguration; +#endif +} + +__attribute__((visibility("default"))) +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags, + hip_impl::program_state& ps) { + + // Skipping passing in ps, because the logging function does not like it + HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, + numDevices, flags); + + return ihipLogStatus(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, + numDevices, + flags, ps)); } namespace hip_impl { @@ -1120,7 +1319,7 @@ const amd_kernel_code_v3_t *header_v3(const ihipModuleSymbol_t& kd) { return reinterpret_cast(kd._header); } -hipFuncAttributes make_function_attributes(TlsData *tls, const ihipModuleSymbol_t& kd) { +hipFuncAttributes make_function_attributes(TlsData *tls, ihipModuleSymbol_t& kd) { hipFuncAttributes r{}; hipDeviceProp_t prop{}; @@ -1130,23 +1329,57 @@ hipFuncAttributes make_function_attributes(TlsData *tls, const ihipModuleSymbol_ prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; if (kd._is_code_object_v3) { - r.localSizeBytes = header_v3(kd)->private_segment_fixed_size; - r.sharedSizeBytes = header_v3(kd)->group_segment_fixed_size; - r.numRegs = ((header_v3(kd)->compute_pgm_rsrc1 & 0x3F) + 1) << 2; r.binaryVersion = 0; // FIXME: should it be the ISA version or code // object format version? + r.localSizeBytes = header_v3(kd)->private_segment_fixed_size; + r.sharedSizeBytes = header_v3(kd)->group_segment_fixed_size; } else { r.localSizeBytes = kd._header->workitem_private_segment_byte_size; r.sharedSizeBytes = kd._header->workgroup_group_segment_byte_size; - r.numRegs = kd._header->workitem_vgpr_count; r.binaryVersion = kd._header->amd_machine_version_major * 10 + kd._header->amd_machine_version_minor; } r.maxDynamicSharedSizeBytes = prop.sharedMemPerBlock - r.sharedSizeBytes; - r.maxThreadsPerBlock = r.numRegs ? - std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) : - prop.maxThreadsPerBlock; + + size_t usedVGPRS = 0; + size_t usedSGPRS = 0; + size_t usedLDS = 0; + getGprsLdsUsage(&kd, &usedVGPRS, &usedSGPRS, &usedLDS); + + r.numRegs = usedVGPRS; + + size_t wavefrontSize = prop.warpSize; + size_t maxWavefrontsPerBlock = prop.maxThreadsPerBlock / wavefrontSize; + size_t maxWavefrontsPerCU = min(prop.maxThreadsPerMultiProcessor / wavefrontSize, 32); + const size_t numSIMD = 4; + const size_t maxWavesPerSimd = maxWavefrontsPerCU / numSIMD; + size_t maxWaves = 0; + for (int i = 0; i < maxWavefrontsPerBlock; i++) { + size_t wavefronts = i + 1; + + if (usedVGPRS > 0) { + size_t availableVGPRs = (prop.regsPerBlock / wavefrontSize / numSIMD); + size_t vgprs_alu_occupancy = numSIMD * std::min(maxWavesPerSimd, availableVGPRs / usedVGPRS); + + // Calculate blocks occupancy per CU based on VGPR usage + if (vgprs_alu_occupancy < wavefronts) + break; + } + + if (usedSGPRS > 0) { + const size_t availableSGPRs = (prop.gcnArch < 800) ? 512 : 800; + size_t sgprs_alu_occupancy = numSIMD * ((usedSGPRS == 0) ? maxWavesPerSimd + : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); + + // Calculate blocks occupancy per CU based on SGPR usage + if (sgprs_alu_occupancy < wavefronts) + break; + } + maxWaves = wavefronts; + } + + r.maxThreadsPerBlock = maxWaves * wavefrontSize; r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0. return r; @@ -1294,29 +1527,6 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const return ihipLogStatus(hipSuccess); } -void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size_t* usedLDS) -{ - if (f->_is_code_object_v3) { - const auto header = reinterpret_cast(f->_header); - // GRANULATED_WAVEFRONT_VGPR_COUNT is specified in 0:5 bits of COMPUTE_PGM_RSRC1 - // the granularity for gfx6-gfx9 is max(0, ceil(vgprs_used / 4) - 1) - *usedVGPRS = ((header->compute_pgm_rsrc1 & 0x3F) + 1) << 2; - // GRANULATED_WAVEFRONT_SGPR_COUNT is specified in 6:9 bits of COMPUTE_PGM_RSRC1 - // the granularity for gfx9+ is 2 * max(0, ceil(sgprs_used / 16) - 1) - *usedSGPRS = ((((header->compute_pgm_rsrc1 & 0x3C0) >> 6) >> 1) + 1) << 4; - *usedLDS = header->group_segment_fixed_size; - } - else { - const auto header = f->_header; - // VGPRs granularity is 4 - *usedVGPRS = ((header->workitem_vgpr_count + 3) >> 2) << 2; - // adding 2 to take into account the 2 VCC registers & handle the granularity of 16 - *usedSGPRS = header->wavefront_sgpr_count + 2; - *usedSGPRS = ((*usedSGPRS + 15) >> 4) << 4; - *usedLDS = header->workgroup_group_segment_byte_size; - } -} - hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, uint32_t* gridSize, uint32_t* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) @@ -1439,59 +1649,6 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit)); } -hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( - TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) -{ - using namespace hip_impl; - - auto ctx = ihipGetTlsDefaultCtx(); - if (ctx == nullptr) { - return hipErrorInvalidDevice; - } - - hipDeviceProp_t prop{}; - ihipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); - - prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; - - size_t usedVGPRS = 0; - size_t usedSGPRS = 0; - size_t usedLDS = 0; - getGprsLdsUsage(f, &usedVGPRS, &usedSGPRS, &usedLDS); - - // Due to SPI and private memory limitations, the max of wavefronts per CU in 32 - size_t wavefrontSize = prop.warpSize; - size_t maxWavefrontsPerCU = min(prop.maxThreadsPerMultiProcessor / wavefrontSize, 32); - - const size_t simdPerCU = 4; - const size_t maxWavesPerSimd = maxWavefrontsPerCU / simdPerCU; - - size_t numWavefronts = (blockSize + wavefrontSize - 1) / wavefrontSize; - - size_t availableVGPRs = (prop.regsPerBlock / wavefrontSize / simdPerCU); - size_t vgprs_alu_occupancy = simdPerCU * (usedVGPRS == 0 ? maxWavesPerSimd - : std::min(maxWavesPerSimd, availableVGPRs / usedVGPRS)); - - // Calculate blocks occupancy per CU based on VGPR usage - *numBlocks = vgprs_alu_occupancy / numWavefronts; - - const size_t availableSGPRs = (prop.gcnArch < 800) ? 512 : 800; - size_t sgprs_alu_occupancy = simdPerCU * (usedSGPRS == 0 ? maxWavesPerSimd - : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); - - // Calculate blocks occupancy per CU based on SGPR usage - *numBlocks = std::min(*numBlocks, (uint32_t) (sgprs_alu_occupancy / numWavefronts)); - - size_t total_used_lds = usedLDS + dynSharedMemPerBlk; - if (total_used_lds != 0) { - // Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds) - size_t lds_occupancy = prop.maxSharedMemoryPerMultiProcessor / total_used_lds; - *numBlocks = std::min(*numBlocks, (uint32_t) lds_occupancy); - } - - return hipSuccess; -} - hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) { diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index 2add6a77c4..63551d1204 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -257,11 +257,39 @@ hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) { hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, unsigned int flags) { HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); - hipError_t e = hipSuccess; - // Create a thread in detached mode to handle callback - ihipStreamCallback_t* cb = new ihipStreamCallback_t(stream, callback, userData); - std::thread(ihipStreamCallbackHandler, cb).detach(); + auto stream_original{stream}; + stream = ihipSyncAndResolveStream(stream); - return ihipLogStatus(e); + if (!stream) return hipErrorInvalidValue; + + LockedAccessor_StreamCrit_t cs{stream->criticalData()}; + + // create first marker + auto cf = cs->_av.create_marker(hc::no_scope); + // get its signal + auto signal = *reinterpret_cast(cf.get_native_handle()); + // increment its signal value + hsa_signal_add_relaxed(signal, 1); + + // create callback that can be passed to hsa_amd_signal_async_handler + // this function will call the user's callback, then sets first packet's signal to 0 to indicate completion + auto t{new std::function{[=]() { + callback(stream_original, hipSuccess, userData); + hsa_signal_store_relaxed(signal, 0); + }}}; + + // register above callback with HSA runtime to be called when first packet's signal + // is decremented from 2 to 1 by CP (or it is already at 1) + hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1, + [](hsa_signal_value_t x, void* p) { + (*static_cast(p))(); + delete static_cast(p); + return false; + }, t); + + // create additional marker that blocks on the first one + cs->_av.create_blocking_marker(cf, hc::no_scope); + + return ihipLogStatus(hipSuccess); } diff --git a/src/hip_texture.cpp b/src/hip_texture.cpp index 27cf321fbc..29f0465dc1 100644 --- a/src/hip_texture.cpp +++ b/src/hip_texture.cpp @@ -301,7 +301,12 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou hsa_ext_sampler_descriptor_t samplerDescriptor; fillSamplerDescriptor(samplerDescriptor, pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords); - + if(hipResourceTypeLinear == pResDesc->resType) { + samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST; + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER; + } else if(!pTexDesc->normalizedCoords) { + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; if(hipResourceTypePitch2D != pResDesc->resType) @@ -312,6 +317,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) || HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + free(pTexture); return ihipLogStatus(hipErrorRuntimeOther); } @@ -438,7 +444,13 @@ hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode r imageDescriptor.format.channel_type = channelType; hsa_ext_sampler_descriptor_t samplerDescriptor; - fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); + samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST; + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER; + if (normalizedCoords) { + samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED; + } else { + samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; @@ -449,6 +461,7 @@ hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode r HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, rowPitch, 0, &(pTexture->image)) || HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + free(pTexture); return hipErrorRuntimeOther; } getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler); @@ -514,7 +527,9 @@ hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode hsa_ext_sampler_descriptor_t samplerDescriptor; fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); - + if(!normalizedCoords) { + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; if( 0 == pitch) @@ -525,6 +540,7 @@ hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) || HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + free(pTexture); return hipErrorRuntimeOther; } getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler); @@ -620,7 +636,9 @@ hipError_t ihipBindTextureToArrayImpl(TlsData *tls_, int dim, enum hipTextureRea hsa_ext_sampler_descriptor_t samplerDescriptor; fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); - + if(!normalizedCoords) { + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(imageDescriptor.width, IMAGE_PITCH_ALIGNMENT); diff --git a/src/hiprtc.cpp b/src/hiprtc.cpp index e9a516c339..a11207f337 100644 --- a/src/hiprtc.cpp +++ b/src/hiprtc.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include "../include/hip/hiprtc.h" -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "code_object_bundle.inl" #include "../include/hip/hcc_detail/elfio/elfio.hpp" #include "../include/hip/hcc_detail/program_state.hpp" diff --git a/src/program_state.inl b/src/program_state.inl index 548a56795f..aef195804e 100644 --- a/src/program_state.inl +++ b/src/program_state.inl @@ -1,6 +1,6 @@ #include "../include/hip/hcc_detail/program_state.hpp" -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "code_object_bundle.inl" #include "../include/hip/hcc_detail/hsa_helpers.hpp" #if !defined(__cpp_exceptions) @@ -357,8 +357,11 @@ public: const auto it1 = get_symbol_addresses().find(x); if (it1 == get_symbol_addresses().cend()) { - hip_throw(std::runtime_error{ - "Global symbol: " + x + " is undefined."}); + // For a unknown symbol, initialize it with a magic poison + hsa_executable_agent_global_variable_define( + executable, agent, x.c_str(), + reinterpret_cast(0xDEADBEEFDEADBEEFull)); + continue; } hsa_status_t status; diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu index c6d62c0007..e6a2178053 100644 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu +++ b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu @@ -385,8 +385,7 @@ double compute_BSR(BCRSArrays& bcsr, double *x , double *y){ cudaEventCreate(&startTime); cudaEventCreate(&stopTime); cudaEventRecord(startTime, bcsr.streamId); - // NOTE: cusparseDbsrmv and CUSPARSE_DIRECTION_COLUMN (of type cusparseDirection_t) are yet unsupported by HIP - // CHECK: cusparseDbsrmv(bcsr.cusparseHandle, CUSPARSE_DIRECTION_COLUMN, HIPSPARSE_OPERATION_NON_TRANSPOSE, + // CHECK: cusparseDbsrmv(bcsr.cusparseHandle, HIPSPARSE_DIRECTION_COLUMN, HIPSPARSE_OPERATION_NON_TRANSPOSE, cusparseDbsrmv(bcsr.cusparseHandle, CUSPARSE_DIRECTION_COLUMN, CUSPARSE_OPERATION_NON_TRANSPOSE, bcsr.nbBlockRow, bcsr.m, bcsr.nbBlocks, &alpha, descr, bcsr.cu_bsrValC, bcsr.cu_bsrRowPtrC, bcsr.cu_bsrColIndC, bcsr.blockSize, diff --git a/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp b/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp new file mode 100644 index 0000000000..10f8c51a6d --- /dev/null +++ b/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp @@ -0,0 +1,46 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 1024 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + e = hipMemcpyFromSymbol(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyFromSymbol(S, NULL, SIZE, 0, hipMemcpyDeviceToHost); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp b/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp new file mode 100644 index 0000000000..fa341c6cea --- /dev/null +++ b/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp @@ -0,0 +1,49 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 1024 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + e = hipMemcpyFromSymbolAsync(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyFromSymbolAsync(S, NULL, SIZE, 0, hipMemcpyDeviceToHost, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/tests/src/Negative/memory/hipMemcpyToSymbol.cpp b/tests/src/Negative/memory/hipMemcpyToSymbol.cpp new file mode 100644 index 0000000000..8626c2c34f --- /dev/null +++ b/tests/src/Negative/memory/hipMemcpyToSymbol.cpp @@ -0,0 +1,46 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 1024 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + e = hipMemcpyToSymbol(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyToSymbol(NULL, S, SIZE, 0, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp b/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp new file mode 100644 index 0000000000..832e4336be --- /dev/null +++ b/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp @@ -0,0 +1,49 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 100 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + e = hipMemcpyToSymbolAsync(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyToSymbolAsync(NULL, S, SIZE, 0, hipMemcpyHostToDevice, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/tests/src/Negative/memory/hipMemory.cpp b/tests/src/Negative/memory/hipMemory.cpp new file mode 100644 index 0000000000..a71ee948f5 --- /dev/null +++ b/tests/src/Negative/memory/hipMemory.cpp @@ -0,0 +1,43 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 100 + +int main(){ + hipError_t e; + char str[SIZE]="Hi, I am Ellesemere. What is ur name?"; + + e = hipMemcpy(0, str, SIZE, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidValue); + + e = hipMemcpy(NULL, str, SIZE, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidValue); + + e = hipMemset(0,99,80); + HIPASSERT(e==hipErrorInvalidValue); + + passed(); +} diff --git a/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp new file mode 100644 index 0000000000..6f0662b82d --- /dev/null +++ b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp @@ -0,0 +1,40 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * TEST: %t + * HIT_END + */ + +#include "test_common.h" + +int main(){ + + hipError_t e; + hipStream_t stream; + + e = hipStreamCreateWithFlags(&stream, -1); + HIPASSERT(e==hipErrorInvalidValue); + + e = hipStreamCreateWithFlags(&stream, 2); + HIPASSERT(e==hipErrorInvalidValue); + + passed(); +} diff --git a/tests/src/deviceLib/hipTestHalf.cpp b/tests/src/deviceLib/hipTestHalf.cpp index 751d44e242..64a9f7fa63 100644 --- a/tests/src/deviceLib/hipTestHalf.cpp +++ b/tests/src/deviceLib/hipTestHalf.cpp @@ -96,6 +96,18 @@ void kernel_hisinf(__half* input, int* output) { output[tx] = __hisinf(input[tx]); } +__global__ void testHalfAbs(float* p) { + auto a = __float2half(*p); + a = __habs(a); + *p = __half2float(a); +} + +__global__ void testHalf2Abs(float2* p) { + auto a = __float22half2_rn(*p); + a = __habs2(a); + *p = __half22float2(a); +} + #endif @@ -237,6 +249,31 @@ void checkFunctional() { return; } +void checkHalfAbs() { + { + float *p; + hipMalloc(&p, sizeof(float)); + float pp = -2.1f; + hipMemcpy(p, &pp, sizeof(float), hipMemcpyDefault); + hipLaunchKernelGGL(testHalfAbs, 1, 1, 0, 0, p); + hipMemcpy(&pp, p, sizeof(float), hipMemcpyDefault); + hipFree(p); + if(pp < 0.0f) { failed("Half Abs failed"); } + } + { + float2 *p; + hipMalloc(&p, sizeof(float2)); + float2 pp; + pp.x = -2.1f; + pp.y = -1.1f; + hipMemcpy(p, &pp, sizeof(float2), hipMemcpyDefault); + hipLaunchKernelGGL(testHalf2Abs, 1, 1, 0, 0, p); + hipMemcpy(&pp, p, sizeof(float2), hipMemcpyDefault); + hipFree(p); + if(pp.x < 0.0f || pp.y < 0.0f) { failed("Half2 Abs Test Failed"); } + } +} + int main() { bool* result{nullptr}; hipMemAllocHost((void**)&result, sizeof(result)); @@ -260,5 +297,7 @@ int main() { // run some functional checks checkFunctional(); + checkHalfAbs(); + passed(); } diff --git a/tests/src/deviceLib/hip_floatnTM.cpp b/tests/src/deviceLib/hip_floatnTM.cpp new file mode 100644 index 0000000000..921933636f --- /dev/null +++ b/tests/src/deviceLib/hip_floatnTM.cpp @@ -0,0 +1,239 @@ +/* +Copyright (c) 2015-2019 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc HIPCC_OPTIONS -std=c++14 + * TEST: %t + * HIT_END + */ + +#include +#include +#include +#include "test_common.h" + +static std::random_device dev; +static std::mt19937 rng(dev()); + +template +__host__ __device__ inline constexpr int count() { + return sizeof(T) / sizeof(M); +} + +inline float getRandomFloat(float min = 10, float max = 100) { + std::uniform_real_distribution gen(min, max); + return gen(rng); +} + +template +void fillMatrix(T* a, int size) { + for (int i = 0; i < size; i++) { + T t; + t.x = getRandomFloat(); + if constexpr (count() >= 2) t.y = getRandomFloat(); + if constexpr (count() >= 3) t.z = getRandomFloat(); + if constexpr (count() >= 4) t.w = getRandomFloat(); + + a[i] = t; + } +} + +// Test operations +template +__host__ __device__ void testOperations(T& a, T& b) { + a.x += b.x; + a.x++; + b.x++; + if constexpr (count() >= 2) { + a.y = b.x; + a.x = b.y; + } + if constexpr (count() >= 3) { + if (a.x > 0) b.x /= a.x; + a.x *= b.z; + a.y--; + } + if constexpr (count() >= 4) { + b.w = a.x; + a.w += (-b.y); + } +} + +template +__global__ void testOperationsGPU(T* d_a, T* d_b, int size) { + int id = threadIdx.x; + if (id > size) return; + T &a = d_a[id]; + T &b = d_b[id]; + + testOperations(a, b); +} + + +template +void dcopy(T* a, T* b, int size) { + for (int i = 0; i < size; i++) { + a[i] = b[i]; + } +} + +template +bool isEqual(T* a, T* b, int size) { + for (int i = 0; i < size; i++) { + if (a[i] != b[i]) { + return false; + } + } + return true; +} + +// Main function that tests type +// T = what you want to test +// D = pack of 1 i.e. float1 int1 +template +void testType(int msize) { + T *fa, *fb, *fc, *h_fa, *h_fb; + fa = new T[msize]; + fb = new T[msize]; + fc = new T[msize]; + h_fa = new T[msize]; + h_fb = new T[msize]; + + T *d_fa, *d_fb; + + constexpr int c = count(); + + if (c <= 0 || c >= 5) { + failed("Invalid Size\n"); + } + + fillMatrix(fa, msize); + dcopy(fb, fa, msize); + dcopy(h_fa, fa, msize); + dcopy(h_fb, fa, msize); + for (int i = 0; i < msize; i++) testOperations(h_fa[i], h_fb[i]); + + hipMalloc(&d_fa, sizeof(T) * msize); + hipMalloc(&d_fb, sizeof(T) * msize); + + hipMemcpy(d_fa, fa, sizeof(T) * msize, hipMemcpyHostToDevice); + hipMemcpy(d_fb, fb, sizeof(T) * msize, hipMemcpyHostToDevice); + + auto kernel = testOperationsGPU; + hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize); + + hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost); + + bool pass = true; + if (!isEqual(h_fa, fc, msize)) { + pass = false; + } + + delete[] fa; + delete[] fb; + delete[] fc; + delete[] h_fa; + delete[] h_fb; + hipFree(d_fa); + hipFree(d_fb); + + if (!pass) { + failed("Failed"); + } +} + +int main() { + const int msize = 100; + // double + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // floats + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ints + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // chars + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // long + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // longlong + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // short + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // uints + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // uchars + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ulong + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ulonglong + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ushort + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + passed(); +} diff --git a/tests/src/hiprtc/hiprtcGetLoweredName.cpp b/tests/src/hiprtc/hiprtcGetLoweredName.cpp index e7b88d26d2..a63e13af64 100644 --- a/tests/src/hiprtc/hiprtcGetLoweredName.cpp +++ b/tests/src/hiprtc/hiprtcGetLoweredName.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/tests/src/hiprtc/saxpy.cpp b/tests/src/hiprtc/saxpy.cpp index d063578757..a08c1c2399 100755 --- a/tests/src/hiprtc/saxpy.cpp +++ b/tests/src/hiprtc/saxpy.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp new file mode 100644 index 0000000000..febc664f7d --- /dev/null +++ b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi + * TEST: %t + * HIT_END + */ + + +#include "test_common.h" + +int main() { + int* A; + int* Ad; + int* Bd; + + // Allocation + HIPCHECK(hipMalloc((void**)&Ad, sizeof(int))); + HIPCHECK(hipMalloc((void**)&Bd, sizeof(int))); + HIPCHECK(hipHostMalloc((void**)&A,sizeof(int))); + + // Kind should be ignored and test should pass even for incorrect kind + HIPCHECK(hipMemcpy(Ad, A, sizeof(int), hipMemcpyDeviceToHost)); + HIPCHECK(hipMemcpy(A, Ad, sizeof(int), hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(Ad, Bd, sizeof(int), hipMemcpyHostToHost)); + HIPCHECK(hipMemcpy(A, A, sizeof(int), hipMemcpyDeviceToDevice)); + + // nullptr passed as source or destination pointer + HIPASSERT(hipSuccess != hipMemcpy(nullptr, A, sizeof(int), hipMemcpyHostToDevice)); + HIPASSERT(hipSuccess != hipMemcpy(Ad, nullptr, sizeof(int), hipMemcpyHostToDevice)); + + HIPCHECK(hipFree(Ad)); + HIPCHECK(hipFree(Bd)); + HIPCHECK(hipFree(A)); + passed(); +} diff --git a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp index 4f180829fa..11bd6e7d50 100644 --- a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp +++ b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp @@ -107,8 +107,8 @@ void run(const std::vector& buffer) { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); } diff --git a/tests/src/runtimeApi/stream/StreamAddCallback.cpp b/tests/src/runtimeApi/stream/StreamAddCallback.cpp new file mode 100644 index 0000000000..e6492c7ce2 --- /dev/null +++ b/tests/src/runtimeApi/stream/StreamAddCallback.cpp @@ -0,0 +1,145 @@ +#include +#include +#include +#include "test_common.h" +#include + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ + +enum class ExecState +{ + EXEC_NOT_STARTED, + EXEC_STARTED, + EXEC_CB_STARTED, + EXEC_CB_FINISHED, + EXEC_FINISHED +}; + +struct UserData +{ + size_t size; + int* ptr; +}; + +// Global variable to check exection order +std::atomic gData(ExecState::EXEC_NOT_STARTED); + + +void myCallback(hipStream_t stream, hipError_t status, void* user_data) +{ + if(gData.load() != ExecState::EXEC_STARTED) + return; // Error hence return early + + gData.store(ExecState::EXEC_CB_STARTED); + + UserData* data = reinterpret_cast(user_data); + printf("Callback started\n"); + + sleep(1); + + printf("Callback ending.\n"); + gData.store(ExecState::EXEC_CB_FINISHED); +} + +bool test(int count) +{ + printf("\n============ Test iteration %d =============\n",count); + // Stream + hipStream_t stream; + bool result = true; + + gData.store(ExecState::EXEC_STARTED); + + HIPCHECK(hipStreamCreate(&stream)); + + // Array size + size_t size = 10000; + + // Device array + int *data = NULL; + HIPCHECK(hipMalloc((void**)&data, sizeof(int) * size)); + + // Initialize device array to -1 + HIPCHECK(hipMemset(data, -1, sizeof(int) * size)); + + // Host array + int *host = NULL; + HIPCHECK(hipHostMalloc((void**)&host, sizeof(int) * size)); + + // Print host ptr address + printf("In main thread\n"); + + // Initialize user_data for callback + UserData arg; + arg.size = size; + arg.ptr = host; + + // Synchronize device + HIPCHECK(hipDeviceSynchronize()); + + // Asynchronous copy from device to host + HIPCHECK(hipMemcpyAsync(host, data, sizeof(int) * size, hipMemcpyDeviceToHost, stream)); + + // Asynchronous memset on device + HIPCHECK(hipMemsetAsync(data, 0, sizeof(int) * size, stream)); + + // Add callback - should happen after hipMemsetAsync() + HIPCHECK(hipStreamAddCallback(stream, myCallback, &arg, 0)); + + printf("Will wait in main thread until callback completes\n"); + + //This should synchronize the stream (including the callback) + HIPCHECK(hipStreamSynchronize(stream)); + + if(gData.load() != ExecState::EXEC_CB_FINISHED) + { + std::cout<<"Callback is not finished\n"; + return false; + } + printf("Callback completed will resume main thread execution\n"); + + if(host[size/2] != -1) + { + // Print some host data that just got copied + printf("Pseudo host data printing (should be -1): %d\n", host[size/2]); + result = false; + } + + HIPCHECK(hipMemcpy(host, data, sizeof(int)*size, hipMemcpyDeviceToHost)); + + if(host[size-1] != 0) + { + printf("Pseudo host data printing (should be 0): %d\n", host[size-1]); + result = false; + } + + HIPCHECK(hipFree(data)); + HIPCHECK(hipHostFree(host)); + HIPCHECK(hipStreamDestroy(stream)); + + gData.store(ExecState::EXEC_FINISHED); + return result; +} + +int main() +{ + // Test involves multithreading hence running multiple times + // to make sure consitency in the behavior + bool status = true; + + for(int i=0; i < 10; i++){ + status = test(i+1); + if(status == false) + { + failed("Test Failed!\n"); + break; + } + } + + if(status == true) passed(); + return 0; +} diff --git a/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp new file mode 100644 index 0000000000..c22b390ecc --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp @@ -0,0 +1,409 @@ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi + * TEST: %t + * HIT_END + */ + +#define WORKAROUND 0 // Enable (1) this to make stream thread-safe by a workaround + +template // = queue blocks, until task is finished in enqueue(queue,task) +class QueueHipRt; + +// Queue types used in the tests +using TestQueues = std::tuple, QueueHipRt>; + + +// --- Implementation + +#define HIP_ASSERT(x) (assert((x)==hipSuccess)) +#define HIP_ASSERT_IGNORE(x,ign) auto err=x; HIP_ASSERT(err==ign ? hipSuccess : err) + +#ifdef __HIP_PLATFORM_HCC__ + #define HIPRT_CB +#endif + +template +static auto currentThreadWaitFor(QueueHipRt const & queue) -> void; + +template +class QueueHipRt +{ +public: + static constexpr bool isBlocking = IsBlocking; + //----------------------------------------------------------------------------- + QueueHipRt( + int dev) : + m_dev(dev), + m_HipQueue() + { + HIP_ASSERT( + hipSetDevice( + m_dev)); + HIP_ASSERT( + hipStreamCreateWithFlags( + &m_HipQueue, + hipStreamNonBlocking)); + } + //----------------------------------------------------------------------------- + QueueHipRt(QueueHipRt const &) = delete; + //----------------------------------------------------------------------------- + QueueHipRt(QueueHipRt &&) = delete; + //----------------------------------------------------------------------------- + auto operator=(QueueHipRt const &) -> QueueHipRt & = delete; + //----------------------------------------------------------------------------- + auto operator=(QueueHipRt &&) -> QueueHipRt & = delete; + //----------------------------------------------------------------------------- + ~QueueHipRt() + { + if(isBlocking) { +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + // we are a non-blocking queue, so we have to wait here with its destruction until all spawned tasks have been processed + currentThreadWaitFor(*this); +#endif + } + HIP_ASSERT( + hipSetDevice( + m_dev)); + HIP_ASSERT( + hipStreamDestroy( + m_HipQueue)); + } + +public: + int m_dev; //!< The device this queue is bound to. + hipStream_t m_HipQueue; + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + int m_callees = 0; + std::mutex m_mutex; +#endif +}; + +template +struct Enqueue +{ + //############################################################################# + enum class CallbackState + { + enqueued, + notified, + finished, + }; + + //############################################################################# + struct CallbackSynchronizationData : public std::enable_shared_from_this + { + std::mutex m_mutex; + std::condition_variable m_event; + CallbackState state = CallbackState::enqueued; + }; + + //----------------------------------------------------------------------------- + static void HIPRT_CB hipRtCallback(hipStream_t /*queue*/, hipError_t /*status*/, void *arg) + { + // explicitly copy the shared_ptr so that this method holds the state even when the executing thread has already finished. + const auto pCallbackSynchronizationData = reinterpret_cast(arg)->shared_from_this(); + + // Notify the executing thread. + { + std::unique_lock lock(pCallbackSynchronizationData->m_mutex); + pCallbackSynchronizationData->state = CallbackState::notified; + } + pCallbackSynchronizationData->m_event.notify_one(); + + // Wait for the executing thread to finish the task if it has not already finished. + std::unique_lock lock(pCallbackSynchronizationData->m_mutex); + if(pCallbackSynchronizationData->state != CallbackState::finished) + { + pCallbackSynchronizationData->m_event.wait( + lock, + [pCallbackSynchronizationData](){ + return pCallbackSynchronizationData->state == CallbackState::finished; + } + ); + } + } + + //----------------------------------------------------------------------------- + template + static auto enqueue( + QueueHipRt & queue, + TTask const & task) + -> void + { + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + { + // thread-safe callee incrementing + std::lock_guard guard(queue.m_mutex); + queue.m_callees += 1; + } +#endif + auto pCallbackSynchronizationData = std::make_shared(); + // test example: https://github.com/ROCm-Developer-Tools/HIP/blob/roc-1.9.x/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp + HIP_ASSERT(hipStreamAddCallback( + queue.m_HipQueue, + hipRtCallback, + pCallbackSynchronizationData.get(), + 0u)); + + // We start a new std::thread which stores the task to be executed. + // This circumvents the limitation that it is not possible to call HIP methods within the HIP callback thread. + // The HIP thread signals the std::thread when it is ready to execute the task. + // The HIP thread is waiting for the std::thread to signal that it is finished executing the task + // before it executes the next task in the queue (HIP stream). + std::thread t( + [pCallbackSynchronizationData, + task +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + ,&queue // requires queue's destructor to wait for all tasks +#endif + ](){ + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + // thread-safe task execution and callee decrementing + std::lock_guard guard(queue.m_mutex); +#endif + + // If the callback has not yet been called, we wait for it. + { + std::unique_lock lock(pCallbackSynchronizationData->m_mutex); + if(pCallbackSynchronizationData->state != CallbackState::notified) + { + pCallbackSynchronizationData->m_event.wait( + lock, + [pCallbackSynchronizationData](){ + return pCallbackSynchronizationData->state == CallbackState::notified; + } + ); + } + + task(); + + // Notify the waiting HIP thread. + pCallbackSynchronizationData->state = CallbackState::finished; + } + pCallbackSynchronizationData->m_event.notify_one(); +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + queue.m_callees -= 1; +#endif + } + ); + if(isBlocking) + t.join(); // => waiting for task completion + else + t.detach(); // => do not wait for task completion + } +}; +//############################################################################# +//! The HIP RT non-blocking queue test trait specialization. +struct Empty +{ + //----------------------------------------------------------------------------- + template + static auto empty( + QueueHipRt const & queue) + -> bool + { + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + return (queue.m_callees==0); +#else + + // Query is allowed even for queues on non current device. + hipError_t ret = hipSuccess; + HIP_ASSERT_IGNORE( + ret = hipStreamQuery( + queue.m_HipQueue), + hipErrorNotReady); + return (ret == hipSuccess); +#endif + } +}; + +template +auto currentThreadWaitFor(QueueHipRt const & queue) -> void +{ +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + while(queue.m_callees>0) { + std::this_thread::sleep_for(std::chrono::milliseconds(10u)); + } +#else + // Sync is allowed even for queues on non current device. + HIP_ASSERT( hipStreamSynchronize( + queue.m_HipQueue)); +#endif +} + + + + +// --- Tests + +#define TEMPLATE_LIST_TEST_CASE(TestName) \ +template static void TestName (std::atomic &check); \ +static int TestName##Runner () { \ + std::atomic check{0}; \ + TestName< QueueHipRt >(check); \ + fprintf(stderr, "After " #TestName " < QueueHipRt > errors=%d\n", check.load()); \ + TestName< QueueHipRt >(check); \ + fprintf(stderr, "After " #TestName " < QueueHipRt > errors=%d\n", check.load()); \ + return check.load(); \ +} \ +template static void TestName (std::atomic &check) + +// add 1 if a check fails +#define CHECK(result) do{int arg=(!(result)); fprintf(stderr, "Checking " #result " %d\n", arg); check.fetch_add(arg);}while(false) + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueIsInitiallyEmpty ) +{ + TestType queue{0}; + CHECK(Empty::empty(queue)); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueCallbackIsWorking ) +{ + std::promise promise; + auto task = [&](){ promise.set_value(true); }; + TestType queue{0}; + Enqueue enqueue; + enqueue.enqueue( + queue, + task + ); + + CHECK(promise.get_future().get()); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueWaitShouldWork ) +{ + bool CallbackFinished = false; + auto task = + [&CallbackFinished]() noexcept + { + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CallbackFinished = true; + }; + TestType queue{0}; + Enqueue enqueue; + enqueue.enqueue( + queue, + task + ); + + currentThreadWaitFor(queue); + CHECK(CallbackFinished); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueShouldNotBeEmptyWhenLastTaskIsStillExecutingAndIsEmptyAfterProcessingFinished ) +{ + bool CallbackFinished = false; + TestType queue{0}; + auto task = [&queue, &CallbackFinished, &check]() noexcept + { + CHECK(!Empty::empty(queue)); + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CallbackFinished = true; + }; + Enqueue enqueue; + enqueue.enqueue( + queue, + task + ); + // A non-blocking queue will always stay empty because the task has been executed immediately. + if(!TestType::isBlocking) + { + currentThreadWaitFor(queue); + } + + CHECK(Empty::empty(queue)); + CHECK(CallbackFinished); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueShouldNotExecuteTasksInParallel ) +{ + std::atomic taskIsExecuting(false); + std::promise firstTaskFinished; + std::future firstTaskFinishedFuture = firstTaskFinished.get_future(); + std::promise secondTaskFinished; + std::future secondTaskFinishedFuture = secondTaskFinished.get_future(); + + TestType queue{0}; + + std::thread thread1( + [&queue, &taskIsExecuting, &firstTaskFinished, &check]() + { + auto task1 = [&taskIsExecuting, &firstTaskFinished, &check]() noexcept + { + CHECK(!taskIsExecuting.exchange(true)); + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CHECK(taskIsExecuting.exchange(false)); + firstTaskFinished.set_value(); + }; + Enqueue enqueue; + enqueue.enqueue( + queue, + task1 + ); + }); + + std::thread thread2( + [&queue, &taskIsExecuting, &secondTaskFinished, &check]() + { + auto task2 = [&taskIsExecuting, &secondTaskFinished, &check]() noexcept + { + CHECK(!taskIsExecuting.exchange(true)); + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CHECK(taskIsExecuting.exchange(false)); + secondTaskFinished.set_value(); + }; + + Enqueue enqueue; + enqueue.enqueue( + queue, + task2 + ); + }); + + // Both tasks have to be enqueued + thread1.join(); + thread2.join(); + + currentThreadWaitFor(queue); + + firstTaskFinishedFuture.get(); + secondTaskFinishedFuture.get(); +} + +#define TESTER(name) do { \ + int result = name (); \ + fprintf(stderr, #name " %s\n", result?"Errors":"No Errors"); \ + if (result) { failed(#name " failed\n"); } \ +} while (false) + +int main() +{ + TESTER(queueIsInitiallyEmptyRunner); + TESTER(queueCallbackIsWorkingRunner); + TESTER(queueWaitShouldWorkRunner); + TESTER(queueShouldNotBeEmptyWhenLastTaskIsStillExecutingAndIsEmptyAfterProcessingFinishedRunner); + TESTER(queueShouldNotExecuteTasksInParallelRunner); + passed(); +} diff --git a/tests/src/texture/hipTex1DFetchCheckModes.cpp b/tests/src/texture/hipTex1DFetchCheckModes.cpp new file mode 100644 index 0000000000..381d07280c --- /dev/null +++ b/tests/src/texture/hipTex1DFetchCheckModes.cpp @@ -0,0 +1,122 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * TEST: %t + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "../test_common.h" + +#define N 16 +#define offset 3 +__global__ void tex1dKernel(float *val, hipTextureObject_t obj) { + int k = blockIdx.x * blockDim.x + threadIdx.x; + if (k < N) + val[k] = tex1Dfetch(obj, k+offset); +} + +int runTest(hipTextureAddressMode, hipTextureFilterMode); + +int main(int argc, char **argv) { + int testResult = runTest(hipAddressModeClamp,hipFilterModePoint); + testResult = runTest(hipAddressModeClamp,hipFilterModeLinear); + testResult = runTest(hipAddressModeWrap,hipFilterModePoint); + testResult = runTest(hipAddressModeWrap,hipFilterModeLinear); + if(testResult) { + passed(); + } else { + exit(EXIT_FAILURE); + } +} + +int runTest(hipTextureAddressMode addressMode, hipTextureFilterMode filterMode) { + + int testResult = 1; + + hipCtx_t HipContext; + hipDevice_t HipDevice; + int deviceID = 0; + hipDeviceGet(&HipDevice, deviceID); + hipCtxCreate(&HipContext, 0, HipDevice); + + // Allocating the required buffer on gpu device + float *texBuf, *texBufOut; + float val[N], output[N]; + + for (int i = 0; i < N; i++) { + val[i] = i+1; + output[i] = 0.0; + } + + HIPCHECK(hipMalloc(&texBuf, N * sizeof(float))); + HIPCHECK(hipMalloc(&texBufOut, N * sizeof(float))); + HIPCHECK(hipMemcpy(texBuf, val, N * sizeof(float), hipMemcpyHostToDevice)); + HIPCHECK(hipMemset(texBufOut, 0, N * sizeof(float))); + hipResourceDesc resDescLinear; + + memset(&resDescLinear, 0, sizeof(resDescLinear)); + resDescLinear.resType = hipResourceTypeLinear; + resDescLinear.res.linear.devPtr = texBuf; + resDescLinear.res.linear.desc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); + resDescLinear.res.linear.sizeInBytes = N * sizeof(float); + + hipTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.readMode = hipReadModeElementType; + + texDesc.addressMode[0] = addressMode; + texDesc.addressMode[1] = addressMode; + texDesc.filterMode = filterMode; + texDesc.normalizedCoords = false; + + // Creating texture object + hipTextureObject_t texObj = 0; + HIPCHECK(hipCreateTextureObject(&texObj, &resDescLinear, &texDesc, NULL)); + + dim3 dimBlock(1, 1, 1); + dim3 dimGrid(N , 1, 1); + + hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, + texBufOut, texObj); + HIPCHECK(hipDeviceSynchronize()); + + HIPCHECK(hipMemcpy(output, texBufOut, N * sizeof(float), hipMemcpyDeviceToHost)); + + for (int i = offset; i < N; i++) { + if (output[i-offset] != val[i]) { + testResult = 0; + break; + } + } + if(testResult){ + for(int i = N-offset; i < N; i++){ + if (output[i] != 0){ + testResult = 0; + break; + } + } + } + HIPCHECK(hipDestroyTextureObject(texObj)); + HIPCHECK(hipFree(texBuf)); + HIPCHECK(hipFree(texBufOut)); + return testResult; +} From 1e4e4cb2296e2dfbd9883c3c1b19def81e109226 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 6 Apr 2020 12:21:48 -0500 Subject: [PATCH 074/132] enabling hipPrintString (to master-next) Change-Id: I28859f3dbe5b867a858ca1d76c93e6fab6a68d1f --- vdi/hip_prof_gen.py | 48 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/vdi/hip_prof_gen.py b/vdi/hip_prof_gen.py index 0195f86f16..5c3f84e536 100755 --- a/vdi/hip_prof_gen.py +++ b/vdi/hip_prof_gen.py @@ -384,6 +384,7 @@ def generate_prof_header(f, api_map, opts_map): f.write('// automatically generated sources\n') f.write('#ifndef _HIP_PROF_STR_H\n'); f.write('#define _HIP_PROF_STR_H\n'); + f.write('#define HIP_PROF_VER 1\n') # Generating dummy macro for non-public API f.write('\n// Dummy API primitives\n') @@ -467,30 +468,29 @@ def generate_prof_header(f, api_map, opts_map): f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n') # Generating the method for the API string, name and parameters - if False: - f.write('\n') - f.write('#if 0\n') - f.write('#include \n'); - f.write('#include \n'); - f.write('// HIP API string method, method name and parameters\n') - f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') - f.write(' std::ostringstream oss;\n') - f.write(' switch (id) {\n') - for name, args in api_map.items(): - f.write(' case HIP_API_ID_' + name + ':\n') - f.write(' oss << "' + name + '("') - for ind in range(0, len(args)): - arg_tuple = args[ind] - arg_name = arg_tuple[1] - if ind != 0: f.write(' << ","') - f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) - f.write('\n << ")";\n') - f.write(' break;\n') - f.write(' default: oss << "unknown";\n') - f.write(' };\n') - f.write(' return strdup(oss.str().c_str());\n') - f.write('};\n') - f.write('#endif\n') + f.write('\n') + f.write('#if HIP_PROF_HIP_API_STRING\n') + f.write('#include \n'); + f.write('#include \n'); + f.write('// HIP API string method, method name and parameters\n') + f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') + f.write(' std::ostringstream oss;\n') + f.write(' switch (id) {\n') + for name, args in api_map.items(): + f.write(' case HIP_API_ID_' + name + ':\n') + f.write(' oss << "' + name + '("') + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_name = arg_tuple[1] + if ind != 0: f.write(' << ","') + f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) + f.write('\n << ")";\n') + f.write(' break;\n') + f.write(' default: oss << "unknown";\n') + f.write(' };\n') + f.write(' return strdup(oss.str().c_str());\n') + f.write('};\n') + f.write('#endif // HIP_PROF_HIP_API_STRING\n') f.write('#endif // _HIP_PROF_STR_H\n'); From de67236a27b913ca8c1b4af9fb0f664abab36a28 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Thu, 9 Apr 2020 13:55:44 -0400 Subject: [PATCH 075/132] Disable all texture tests for VDI Latest llvm already includes the texture/surface rework, but appropriate runtime changes have not been submitted. Disable all texture related tests until http://gerrit-git.amd.com/c/compute/ec/hip/+/342147 is submitted. Change-Id: I359c2eac6becdd3ca5110f2140679bd29d8ae54b --- tests/src/runtimeApi/module/tex2d_kernel.cpp | 2 +- tests/src/texture/hipBindTex2DPitch.cpp | 2 +- tests/src/texture/hipBindTexRef1DFetch.cpp | 2 +- tests/src/texture/hipNormalizedFloatValueTex.cpp | 2 +- tests/src/texture/hipTextureRef2D.cpp | 2 +- tests/src/texture/simpleTexture2DLayered.cpp | 2 +- tests/src/texture/simpleTexture3D.cpp | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/src/runtimeApi/module/tex2d_kernel.cpp b/tests/src/runtimeApi/module/tex2d_kernel.cpp index be121f3e69..e744d88776 100644 --- a/tests/src/runtimeApi/module/tex2d_kernel.cpp +++ b/tests/src/runtimeApi/module/tex2d_kernel.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code + * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code EXCLUDE_HIP_PLATFORM vdi * HIT_END */ diff --git a/tests/src/texture/hipBindTex2DPitch.cpp b/tests/src/texture/hipBindTex2DPitch.cpp index b01402c91d..8c57520c00 100644 --- a/tests/src/texture/hipBindTex2DPitch.cpp +++ b/tests/src/texture/hipBindTex2DPitch.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /*HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp index 52a0d99ac1..2e962fb05d 100644 --- a/tests/src/texture/hipBindTexRef1DFetch.cpp +++ b/tests/src/texture/hipBindTexRef1DFetch.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipNormalizedFloatValueTex.cpp b/tests/src/texture/hipNormalizedFloatValueTex.cpp index 96917ecb62..b4aa3e9c05 100644 --- a/tests/src/texture/hipNormalizedFloatValueTex.cpp +++ b/tests/src/texture/hipNormalizedFloatValueTex.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc hcc + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc hcc vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp index b476ae8062..5573cf6884 100644 --- a/tests/src/texture/hipTextureRef2D.cpp +++ b/tests/src/texture/hipTextureRef2D.cpp @@ -1,5 +1,5 @@ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/simpleTexture2DLayered.cpp b/tests/src/texture/simpleTexture2DLayered.cpp index e5014dae6b..f4d3aac1e5 100644 --- a/tests/src/texture/simpleTexture2DLayered.cpp +++ b/tests/src/texture/simpleTexture2DLayered.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/simpleTexture3D.cpp b/tests/src/texture/simpleTexture3D.cpp index 5395fc5d69..a494a1a6c0 100644 --- a/tests/src/texture/simpleTexture3D.cpp +++ b/tests/src/texture/simpleTexture3D.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ From 4d9c540f4fe57551dbfc3cb1b8bf85ee3c19f551 Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Thu, 9 Apr 2020 12:44:21 -0700 Subject: [PATCH 076/132] Remove a map lookup whenever we were getting the default stream Change-Id: I64b6d1deea41d81e94a58a83de287e78923656b3 --- vdi/hip_context.cpp | 24 ++++-------------------- vdi/hip_device.cpp | 14 ++++++++++++++ vdi/hip_internal.hpp | 5 +++-- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/vdi/hip_context.cpp b/vdi/hip_context.cpp index 6151f68c6a..440c3f4b47 100644 --- a/vdi/hip_context.cpp +++ b/vdi/hip_context.cpp @@ -34,8 +34,6 @@ thread_local hipError_t g_lastError = hipSuccess; std::once_flag g_ihipInitialized; Device* host_device = nullptr; -std::map g_nullStreams; - void init() { if (!amd::Runtime::initialized()) { amd::IS_HIP = true; @@ -93,24 +91,10 @@ amd::HostQueue* getQueue(hipStream_t stream) { } } -amd::HostQueue* getNullStream(Device& dev) { - auto stream = g_nullStreams.find(&dev); - if (stream == g_nullStreams.end()) { - amd::Device* device = dev.devices()[0]; - cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; - amd::HostQueue* queue = new amd::HostQueue(*dev.asContext(), *device, properties, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); - g_nullStreams[&dev] = queue; - return queue; - } - return stream->second; -} - amd::HostQueue* getNullStream(amd::Context& ctx) { - for (auto& it : g_nullStreams) { - if (it.first->asContext() == &ctx) { - return it.second; + for (auto& it : g_devices) { + if (it->asContext() == &ctx) { + return it->defaultStream(); } } return nullptr; @@ -118,7 +102,7 @@ amd::HostQueue* getNullStream(amd::Context& ctx) { amd::HostQueue* getNullStream() { Device* device = getCurrentDevice(); - return device ? getNullStream(*device) : nullptr; + return device ? device->defaultStream() : nullptr; } }; diff --git a/vdi/hip_device.cpp b/vdi/hip_device.cpp index 30b2292271..cc27fef05e 100644 --- a/vdi/hip_device.cpp +++ b/vdi/hip_device.cpp @@ -22,6 +22,20 @@ #include "hip_internal.hpp" +namespace hip { + +amd::HostQueue* Device::defaultStream() { + if (defaultStream_ == nullptr) { + const cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + defaultStream_ = new amd::HostQueue(*asContext(), *devices()[0], properties, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + } + return defaultStream_; +} + +}; + hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { HIP_INIT_API(hipDeviceGet, device, deviceId); diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index a2851fd978..ed99d6911e 100755 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -84,6 +84,8 @@ namespace hip { amd::Monitor lock_{"Device lock"}; /// VDI context amd::Context* context_; + /// VDI host queue for default streams + amd::HostQueue* defaultStream_; /// Device's ID /// Store it here so we don't have to loop through the device list every time int deviceId_; @@ -117,6 +119,7 @@ namespace hip { return hipErrorPeerAccessNotEnabled; } } + amd::HostQueue* defaultStream(); }; extern std::once_flag g_ihipInitialized; @@ -135,8 +138,6 @@ namespace hip { /// Note: This follows the CUDA spec to sync with default streams /// and Blocking streams extern amd::HostQueue* getQueue(hipStream_t s); - /// Get default stream of the device - extern amd::HostQueue* getNullStream(Device&); /// Get default stream associated with the VDI context extern amd::HostQueue* getNullStream(amd::Context&); /// Get default stream of the thread From 2613b22e8a1d7461574229a76bff734a756106f0 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Wed, 8 Apr 2020 16:00:31 -0400 Subject: [PATCH 077/132] Fix warnings when compiling with clang The following warnings are addressed: comparison of different enumeration types in switch statement Change-Id: I6cb3948aeab7287851c57ecc1d4b3a439ab14ec6 --- vdi/hip_conversions.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vdi/hip_conversions.hpp b/vdi/hip_conversions.hpp index d79a39c321..8c0726787f 100644 --- a/vdi/hip_conversions.hpp +++ b/vdi/hip_conversions.hpp @@ -672,7 +672,7 @@ hipResourceDesc getResourceDesc(const HIP_RESOURCE_DESC& resDesc) { hipResourceDesc desc; desc.resType = getResourceType(resDesc.resType); - switch (resDesc.resType) { + switch (desc.resType) { case hipResourceTypeArray: desc.res.array.array = resDesc.res.array.hArray; break; @@ -703,7 +703,7 @@ HIP_RESOURCE_DESC getResourceDesc(const hipResourceDesc& resDesc) { HIP_RESOURCE_DESC desc; desc.resType = getResourceType(resDesc.resType); - switch (resDesc.resType) { + switch (desc.resType) { case HIP_RESOURCE_TYPE_ARRAY: desc.res.array.hArray = resDesc.res.array.array; break; From 7592fd18d7edd884d89e64f661f77d3a2324b136 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Wed, 8 Apr 2020 12:56:09 -0400 Subject: [PATCH 078/132] Fix Windows build Change-Id: I8e219f8200875e3c46c1f54348317ba7ad8ae8ba --- vdi/hip_internal.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index ed99d6911e..03dcf0d51d 100755 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -48,7 +48,7 @@ typedef struct ihipIpcMemHandle_st { } ihipIpcMemHandle_t; #ifdef _WIN32 - int getpid() { return _getpid();} + inline int getpid() { return _getpid(); } #endif #define HIP_INIT() \ From a81e7d63c680c65449afcaa9cfeb31cb86bc8d02 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Wed, 8 Apr 2020 13:01:15 -0400 Subject: [PATCH 079/132] Correctly check max 1D image buffer size VDI reports the limits in pixels, but user provides the size in bytes. Make sure both values are in pixels before doing comparisons. Change-Id: I082c7175c9fa4383e0b0ee38ff8c047c26ff20b4 --- vdi/hip_conversions.hpp | 5 +++++ vdi/hip_texture.cpp | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/vdi/hip_conversions.hpp b/vdi/hip_conversions.hpp index 8c0726787f..2a78617ad9 100644 --- a/vdi/hip_conversions.hpp +++ b/vdi/hip_conversions.hpp @@ -895,4 +895,9 @@ HIP_RESOURCE_VIEW_DESC getResourceViewDesc(const hipResourceViewDesc& resViewDes return desc; } + +inline +size_t getElementSize(const hipChannelFormatDesc &desc) { + return (desc.x / 4) * getNumChannels(desc); +} }; diff --git a/vdi/hip_texture.cpp b/vdi/hip_texture.cpp index 0f8e818948..b837729721 100644 --- a/vdi/hip_texture.cpp +++ b/vdi/hip_texture.cpp @@ -100,7 +100,7 @@ hipError_t ihipCreateTextureObject(hipTextureObject_t* pTexObject, if ((pResDesc->resType == hipResourceTypeLinear) && ((pResDesc->res.linear.devPtr == nullptr) || (!amd::isMultipleOf(pResDesc->res.linear.devPtr, info.imageBaseAddressAlignment_)) || - (pResDesc->res.linear.sizeInBytes >= info.imageMaxBufferSize_))) { + ((pResDesc->res.linear.sizeInBytes / hip::getElementSize(pResDesc->res.linear.desc)) >= info.imageMaxBufferSize_))) { return hipErrorInvalidValue; } From b8b5bdf44bca524277223a40f3d9c66c26bea05d Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 9 Apr 2020 09:59:46 -0500 Subject: [PATCH 080/132] fxing C compatibility (amd-master-next) Change-Id: Ib95b953bb49e0edbe044789b6ff81aaccb87f85f --- vdi/hip_prof_gen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vdi/hip_prof_gen.py b/vdi/hip_prof_gen.py index 5c3f84e536..04f92e0a00 100755 --- a/vdi/hip_prof_gen.py +++ b/vdi/hip_prof_gen.py @@ -427,7 +427,7 @@ def generate_prof_header(f, api_map, opts_map): # Generating the callbacks data structure f.write('\n// HIP API callbacks data structure\n') f.write( - 'struct hip_api_data_t {\n' + + 'typedef struct hip_api_data_s {\n' + ' uint64_t correlation_id;\n' + ' uint32_t phase;\n' + ' union {\n' @@ -443,7 +443,7 @@ def generate_prof_header(f, api_map, opts_map): f.write(' } ' + name + ';\n') f.write( ' } args;\n' + - '};\n' + '} hip_api_data_t;\n' ) # Generating the callbacks args data filling macros From a01e2c7d03c6d7eab8133998f6dfc60337b0c8c1 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 10 Apr 2020 19:36:43 +0300 Subject: [PATCH 081/132] [HIP][SPARSE] sync with HIPIFY's #95 and #96 Based on: https://github.com/ROCm-Developer-Tools/HIPIFY/pull/95 https://github.com/ROCm-Developer-Tools/HIPIFY/pull/96 Update hipify-perl and CUSPARSE_API_supported_by_HIP.md accordingly --- bin/hipify-perl | 22 +++++++++ .../markdown/CUSPARSE_API_supported_by_HIP.md | 45 +++++++++---------- 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 9b170ccb14..baaca2ae71 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -738,8 +738,12 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcurandSetQuasiRandomGeneratorDimensions\b/hiprandSetQuasiRandomGeneratorDimensions/g; $ft{'library'} += s/\bcurandSetStream\b/hiprandSetStream/g; $ft{'library'} += s/\bcusparseCaxpyi\b/hipsparseCaxpyi/g; + $ft{'library'} += s/\bcusparseCbsrmv\b/hipsparseCbsrmv/g; $ft{'library'} += s/\bcusparseCcsr2csc\b/hipsparseCcsr2csc/g; $ft{'library'} += s/\bcusparseCcsr2hyb\b/hipsparseCcsr2hyb/g; + $ft{'library'} += s/\bcusparseCcsrgeam\b/hipsparseCcsrgeam/g; + $ft{'library'} += s/\bcusparseCcsrgeam2\b/hipsparseCcsrgeam2/g; + $ft{'library'} += s/\bcusparseCcsrgeam2_bufferSizeExt\b/hipsparseCcsrgeam2_bufferSizeExt/g; $ft{'library'} += s/\bcusparseCcsrgemm\b/hipsparseCcsrgemm/g; $ft{'library'} += s/\bcusparseCcsrgemm2\b/hipsparseCcsrgemm2/g; $ft{'library'} += s/\bcusparseCcsrgemm2_bufferSizeExt\b/hipsparseCcsrgemm2_bufferSizeExt/g; @@ -763,6 +767,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseCgthrz\b/hipsparseCgthrz/g; $ft{'library'} += s/\bcusparseChybmv\b/hipsparseChybmv/g; $ft{'library'} += s/\bcusparseCnnz\b/hipsparseCnnz/g; + $ft{'library'} += s/\bcusparseCnnz_compress\b/hipsparseCnnz_compress/g; $ft{'library'} += s/\bcusparseCreate\b/hipsparseCreate/g; $ft{'library'} += s/\bcusparseCreateCsrgemm2Info\b/hipsparseCreateCsrgemm2Info/g; $ft{'library'} += s/\bcusparseCreateCsrilu02Info\b/hipsparseCreateCsrilu02Info/g; @@ -773,8 +778,12 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseCreateMatDescr\b/hipsparseCreateMatDescr/g; $ft{'library'} += s/\bcusparseCsctr\b/hipsparseCsctr/g; $ft{'library'} += s/\bcusparseDaxpyi\b/hipsparseDaxpyi/g; + $ft{'library'} += s/\bcusparseDbsrmv\b/hipsparseDbsrmv/g; $ft{'library'} += s/\bcusparseDcsr2csc\b/hipsparseDcsr2csc/g; $ft{'library'} += s/\bcusparseDcsr2hyb\b/hipsparseDcsr2hyb/g; + $ft{'library'} += s/\bcusparseDcsrgeam\b/hipsparseDcsrgeam/g; + $ft{'library'} += s/\bcusparseDcsrgeam2\b/hipsparseDcsrgeam2/g; + $ft{'library'} += s/\bcusparseDcsrgeam2_bufferSizeExt\b/hipsparseDcsrgeam2_bufferSizeExt/g; $ft{'library'} += s/\bcusparseDcsrgemm\b/hipsparseDcsrgemm/g; $ft{'library'} += s/\bcusparseDcsrgemm2\b/hipsparseDcsrgemm2/g; $ft{'library'} += s/\bcusparseDcsrgemm2_bufferSizeExt\b/hipsparseDcsrgemm2_bufferSizeExt/g; @@ -804,6 +813,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseDgthrz\b/hipsparseDgthrz/g; $ft{'library'} += s/\bcusparseDhybmv\b/hipsparseDhybmv/g; $ft{'library'} += s/\bcusparseDnnz\b/hipsparseDnnz/g; + $ft{'library'} += s/\bcusparseDnnz_compress\b/hipsparseDnnz_compress/g; $ft{'library'} += s/\bcusparseDroti\b/hipsparseDroti/g; $ft{'library'} += s/\bcusparseDsctr\b/hipsparseDsctr/g; $ft{'library'} += s/\bcusparseGetMatDiagType\b/hipsparseGetMatDiagType/g; @@ -814,8 +824,12 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseGetStream\b/hipsparseGetStream/g; $ft{'library'} += s/\bcusparseGetVersion\b/hipsparseGetVersion/g; $ft{'library'} += s/\bcusparseSaxpyi\b/hipsparseSaxpyi/g; + $ft{'library'} += s/\bcusparseSbsrmv\b/hipsparseSbsrmv/g; $ft{'library'} += s/\bcusparseScsr2csc\b/hipsparseScsr2csc/g; $ft{'library'} += s/\bcusparseScsr2hyb\b/hipsparseScsr2hyb/g; + $ft{'library'} += s/\bcusparseScsrgeam\b/hipsparseScsrgeam/g; + $ft{'library'} += s/\bcusparseScsrgeam2\b/hipsparseScsrgeam2/g; + $ft{'library'} += s/\bcusparseScsrgeam2_bufferSizeExt\b/hipsparseScsrgeam2_bufferSizeExt/g; $ft{'library'} += s/\bcusparseScsrgemm\b/hipsparseScsrgemm/g; $ft{'library'} += s/\bcusparseScsrgemm2\b/hipsparseScsrgemm2/g; $ft{'library'} += s/\bcusparseScsrgemm2_bufferSizeExt\b/hipsparseScsrgemm2_bufferSizeExt/g; @@ -844,6 +858,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseSgthrz\b/hipsparseSgthrz/g; $ft{'library'} += s/\bcusparseShybmv\b/hipsparseShybmv/g; $ft{'library'} += s/\bcusparseSnnz\b/hipsparseSnnz/g; + $ft{'library'} += s/\bcusparseSnnz_compress\b/hipsparseSnnz_compress/g; $ft{'library'} += s/\bcusparseSroti\b/hipsparseSroti/g; $ft{'library'} += s/\bcusparseSsctr\b/hipsparseSsctr/g; $ft{'library'} += s/\bcusparseXbsrilu02_zeroPivot\b/hipsparseXbsrilu02_zeroPivot/g; @@ -854,6 +869,8 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseXcscsort\b/hipsparseXcscsort/g; $ft{'library'} += s/\bcusparseXcscsort_bufferSizeExt\b/hipsparseXcscsort_bufferSizeExt/g; $ft{'library'} += s/\bcusparseXcsr2coo\b/hipsparseXcsr2coo/g; + $ft{'library'} += s/\bcusparseXcsrgeam2Nnz\b/hipsparseXcsrgeam2Nnz/g; + $ft{'library'} += s/\bcusparseXcsrgeamNnz\b/hipsparseXcsrgeamNnz/g; $ft{'library'} += s/\bcusparseXcsrgemm2Nnz\b/hipsparseXcsrgemm2Nnz/g; $ft{'library'} += s/\bcusparseXcsrgemmNnz\b/hipsparseXcsrgemmNnz/g; $ft{'library'} += s/\bcusparseXcsrilu02_zeroPivot\b/hipsparseXcsrilu02_zeroPivot/g; @@ -862,8 +879,12 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseXcsrsort_bufferSizeExt\b/hipsparseXcsrsort_bufferSizeExt/g; $ft{'library'} += s/\bcusparseXcsrsv2_zeroPivot\b/hipsparseXcsrsv2_zeroPivot/g; $ft{'library'} += s/\bcusparseZaxpyi\b/hipsparseZaxpyi/g; + $ft{'library'} += s/\bcusparseZbsrmv\b/hipsparseZbsrmv/g; $ft{'library'} += s/\bcusparseZcsr2csc\b/hipsparseZcsr2csc/g; $ft{'library'} += s/\bcusparseZcsr2hyb\b/hipsparseZcsr2hyb/g; + $ft{'library'} += s/\bcusparseZcsrgeam\b/hipsparseZcsrgeam/g; + $ft{'library'} += s/\bcusparseZcsrgeam2\b/hipsparseZcsrgeam2/g; + $ft{'library'} += s/\bcusparseZcsrgeam2_bufferSizeExt\b/hipsparseZcsrgeam2_bufferSizeExt/g; $ft{'library'} += s/\bcusparseZcsrgemm\b/hipsparseZcsrgemm/g; $ft{'library'} += s/\bcusparseZcsrgemm2\b/hipsparseZcsrgemm2/g; $ft{'library'} += s/\bcusparseZcsrgemm2_bufferSizeExt\b/hipsparseZcsrgemm2_bufferSizeExt/g; @@ -887,6 +908,7 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseZgthrz\b/hipsparseZgthrz/g; $ft{'library'} += s/\bcusparseZhybmv\b/hipsparseZhybmv/g; $ft{'library'} += s/\bcusparseZnnz\b/hipsparseZnnz/g; + $ft{'library'} += s/\bcusparseZnnz_compress\b/hipsparseZnnz_compress/g; $ft{'library'} += s/\bcusparseZsctr\b/hipsparseZsctr/g; $ft{'device_library'} += s/\bcurand\b/hiprand/g; $ft{'device_library'} += s/\bcurand_discrete\b/hiprand_discrete/g; diff --git a/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/docs/markdown/CUSPARSE_API_supported_by_HIP.md index d23b06d307..2c0a1bea9d 100644 --- a/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -203,10 +203,10 @@ | **CUDA** | **HIP** |**CUDA version\***| |-----------------------------------------------------------|-------------------------------------------------|:----------------:| -|`cusparseSbsrmv` | | -|`cusparseDbsrmv` | | -|`cusparseCbsrmv` | | -|`cusparseZbsrmv` | | +|`cusparseSbsrmv` |`hipsparseSbsrmv` | +|`cusparseDbsrmv` |`hipsparseDbsrmv` | +|`cusparseCbsrmv` |`hipsparseCbsrmv` | +|`cusparseZbsrmv` |`hipsparseZbsrmv` | |`cusparseSbsrxmv` | | |`cusparseDbsrxmv` | | |`cusparseCbsrxmv` | | @@ -349,20 +349,20 @@ | **CUDA** | **HIP** |**CUDA version\***| |-----------------------------------------------------------|-------------------------------------------------|:----------------:| -|`cusparseXcsrgeamNnz` | | -|`cusparseScsrgeam` | | -|`cusparseDcsrgeam` | | -|`cusparseCcsrgeam` | | -|`cusparseZcsrgeam` | | -|`cusparseXcsrgeam2Nnz` | | 9.2 | -|`cusparseScsrgeam2` | | 9.2 | -|`cusparseDcsrgeam2` | | 9.2 | -|`cusparseCcsrgeam2` | | 9.2 | -|`cusparseZcsrgeam2` | | 9.2 | -|`cusparseScsrgeam2_bufferSizeExt` | | 9.2 | -|`cusparseDcsrgeam2_bufferSizeExt` | | 9.2 | -|`cusparseCcsrgeam2_bufferSizeExt` | | 9.2 | -|`cusparseZcsrgeam2_bufferSizeExt` | | 9.2 | +|`cusparseXcsrgeamNnz` |`hipsparseXcsrgeamNnz` | +|`cusparseScsrgeam` |`hipsparseScsrgeam` | +|`cusparseDcsrgeam` |`hipsparseDcsrgeam` | +|`cusparseCcsrgeam` |`hipsparseCcsrgeam` | +|`cusparseZcsrgeam` |`hipsparseZcsrgeam` | +|`cusparseXcsrgeam2Nnz` |`hipsparseXcsrgeam2Nnz` | 9.2 | +|`cusparseScsrgeam2` |`hipsparseScsrgeam2` | 9.2 | +|`cusparseDcsrgeam2` |`hipsparseDcsrgeam2` | 9.2 | +|`cusparseCcsrgeam2` |`hipsparseCcsrgeam2` | 9.2 | +|`cusparseZcsrgeam2` |`hipsparseZcsrgeam2` | 9.2 | +|`cusparseScsrgeam2_bufferSizeExt` |`hipsparseScsrgeam2_bufferSizeExt` | 9.2 | +|`cusparseDcsrgeam2_bufferSizeExt` |`hipsparseDcsrgeam2_bufferSizeExt` | 9.2 | +|`cusparseCcsrgeam2_bufferSizeExt` |`hipsparseCcsrgeam2_bufferSizeExt` | 9.2 | +|`cusparseZcsrgeam2_bufferSizeExt` |`hipsparseZcsrgeam2_bufferSizeExt` | 9.2 | |`cusparseXcsrgemmNnz` |`hipsparseXcsrgemmNnz` | |`cusparseScsrgemm` |`hipsparseScsrgemm` | |`cusparseDcsrgemm` |`hipsparseDcsrgemm` | @@ -378,7 +378,6 @@ |`cusparseCcsrgemm2_bufferSizeExt` |`hipsparseCcsrgemm2_bufferSizeExt` | |`cusparseZcsrgemm2_bufferSizeExt` |`hipsparseZcsrgemm2_bufferSizeExt` | - ## **7. cuSPARSE Preconditioners Reference** ## ***7.1. Incomplete Cholesky Factorization: level 0*** @@ -724,10 +723,10 @@ |`cusparseHpruneCsr2csrNnzByPercentage` | | 9.0 | |`cusparseSpruneCsr2csrNnzByPercentage` | | 9.0 | |`cusparseDpruneCsr2csrNnzByPercentage` | | 9.0 | -|`cusparseSnnz_compress` | | 8.0 | -|`cusparseDnnz_compress` | | 8.0 | -|`cusparseCnnz_compress` | | 8.0 | -|`cusparseZnnz_compress` | | 8.0 | +|`cusparseSnnz_compress` |`hipsparseSnnz_compress` | 8.0 | +|`cusparseDnnz_compress` |`hipsparseDnnz_compress` | 8.0 | +|`cusparseCnnz_compress` |`hipsparseCnnz_compress` | 8.0 | +|`cusparseZnnz_compress` |`hipsparseZnnz_compress` | 8.0 | ## **10. cuSPARSE Generic API Reference** From f311b0062f8e58709b32fee3b5260192ef3fe235 Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Fri, 10 Apr 2020 22:24:33 -0400 Subject: [PATCH 082/132] Fix Windows build Change-Id: I8c46c8ee82a6e47483d4c0430b483eead3772e5b --- include/hip/hcc_detail/hiprtc.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/hip/hcc_detail/hiprtc.h b/include/hip/hcc_detail/hiprtc.h index ec9c85716a..fecea75340 100644 --- a/include/hip/hcc_detail/hiprtc.h +++ b/include/hip/hcc_detail/hiprtc.h @@ -28,7 +28,9 @@ extern "C" { #include +#if !defined(_WIN32) #pragma GCC visibility push (default) +#endif enum hiprtcResult { HIPRTC_SUCCESS = 0, @@ -81,7 +83,9 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code); hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet); +#if !defined(_WIN32) #pragma GCC visibility pop +#endif #ifdef __cplusplus } From cf8589b8c8a40ddcc55fa3a51e23390a49824130 Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Mon, 13 Apr 2020 04:32:52 -0400 Subject: [PATCH 083/132] [HIP] add support for NoPreSync/NoPostSync flags for Cooperative MultiDevice launch API (#1990) --- src/hip_hcc.cpp | 8 +++++- src/hip_hcc_internal.h | 32 +++++++++++++----------- src/hip_memory.cpp | 6 ++--- src/hip_module.cpp | 57 +++++++++++++++++++++++++++++------------- 4 files changed, 66 insertions(+), 37 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 807dcc7391..2fd40903d7 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -263,7 +263,13 @@ ihipStream_t::ihipStream_t(ihipCtx_t* ctx, hc::accelerator_view av, unsigned int //--- -ihipStream_t::~ihipStream_t() {} +ihipStream_t::~ihipStream_t() { + GET_TLS(); + for (auto mem : coopMemsTracker) { + hip_internal::ihipHostFree(tls, mem->mgs); + hip_internal::ihipHostFree(tls, mem); + } +} hc::hcWaitMode ihipStream_t::waitMode() const { diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 93551c8316..2c3fb25b3a 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -551,6 +551,20 @@ public: typedef ihipStreamCriticalBase_t ihipStreamCritical_t; typedef LockedAccessor LockedAccessor_StreamCrit_t; +// do not change these two structs without changing the device library +struct mg_sync { + uint w0; + uint w1; +}; + +struct mg_info { + struct mg_sync *mgs; + uint grid_id; + uint num_grids; + ulong prev_sum; + ulong all_sum; +}; + //--- // Internal stream structure. class ihipStream_t { @@ -619,6 +633,8 @@ class ihipStream_t { // Before calling this function, stream must be resolved from "0" to the actual stream: bool isDefaultStream() const { return _id == 0; }; + std::vector coopMemsTracker; + public: //--- // Public member vars - these are set at initialization and never change: @@ -1018,7 +1034,7 @@ namespace hip_internal { hipError_t memcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream); -hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags); +hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags, bool noSync = 0); hipError_t ihipHostFree(TlsData *tls, void* ptr); @@ -1026,20 +1042,6 @@ hipError_t ihipHostFree(TlsData *tls, void* ptr); #define MAX_COOPERATIVE_GPUs 255 -// do not change these two structs without changing the device library -struct mg_sync { - uint w0; - uint w1; -}; - -struct mg_info { - struct mg_sync *mgs; - uint grid_id; - uint num_grids; - ulong prev_sum; - ulong all_sum; -}; - //--- // TODO - review the context creation strategy here. Really should be: // - first "non-device" runtime call creates the context for this thread. Allowed to call diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 832dcc5531..de6bc63b20 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -496,14 +496,14 @@ void* allocAndSharePtr(const char* msg, size_t sizeBytes, ihipCtx_t* ctx, bool s return ptr; } -hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) { +hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags, bool noSync) { hipError_t hip_status = hipSuccess; if (sizeBytes == 0) { return hipSuccess; } - if (HIP_SYNC_HOST_ALLOC) { + if (HIP_SYNC_HOST_ALLOC && !noSync) { hipDeviceSynchronize(); } @@ -558,7 +558,7 @@ hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned i } } - if (HIP_SYNC_HOST_ALLOC) { + if (HIP_SYNC_HOST_ALLOC && !noSync) { hipDeviceSynchronize(); } return hip_status; diff --git a/src/hip_module.cpp b/src/hip_module.cpp index d0ec0df9de..e98afa3294 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -511,8 +511,7 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, size_t globalWorkSizeX = (size_t)gridDim.x * (size_t)blockDim.x; size_t globalWorkSizeY = (size_t)gridDim.y * (size_t)blockDim.y; size_t globalWorkSizeZ = (size_t)gridDim.z * (size_t)blockDim.z; - if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) - { + if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) { return hipErrorInvalidConfiguration; } @@ -738,7 +737,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL vector mg_info_ptr; - result = hip_internal::ihipHostMalloc(tls, (void **)&mg_sync_ptr, sizeof(mg_sync), hipHostMallocDefault); + result = hip_internal::ihipHostMalloc(tls, (void **)&mg_sync_ptr, sizeof(mg_sync), hipHostMallocDefault, true); if (result != hipSuccess) { return hipErrorInvalidValue; } @@ -748,7 +747,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL uint all_sum = 0; for (int i = 0; i < numDevices; ++i) { mg_info *mg_info_temp = nullptr; - result = hip_internal::ihipHostMalloc(tls, (void **)&mg_info_temp, sizeof(mg_info), hipHostMallocDefault); + result = hip_internal::ihipHostMalloc(tls, (void **)&mg_info_temp, sizeof(mg_info), hipHostMallocDefault, true); if (result != hipSuccess) { hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < i; ++j) { @@ -770,11 +769,22 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL hc::completion_future streamCF; if (!streamCrit->_av.get_is_empty()) { streamCF = streamCrit->_av.create_marker(hc::accelerator_scope); - coopAVs[i].create_blocking_marker(streamCF, hc::accelerator_scope); + if (flags & hipCooperativeLaunchMultiDeviceNoPreSync) { + coopAVs[i].create_blocking_marker(streamCF, hc::accelerator_scope); + streamCrit->_av.acquire_locked_hsa_queue(); + coopAVs[i].acquire_locked_hsa_queue(); + } else { + for (int j = 0; j < numDevices; ++j) { + coopAVs[j].create_blocking_marker(streamCF, hc::accelerator_scope); + } + } + } + } + if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + launchParamsList[i].stream->criticalData()._av.acquire_locked_hsa_queue(); + coopAVs[i].acquire_locked_hsa_queue(); } - - streamCrit->_av.acquire_locked_hsa_queue(); - coopAVs[i].acquire_locked_hsa_queue(); } // launch the init_gws kernel to initialize the GWS for each device @@ -820,14 +830,14 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL prev_sum += lp.blockDim.x * lp.blockDim.y * lp.blockDim.z * lp.gridDim.x * lp.gridDim.y * lp.gridDim.z; + lp.stream->coopMemsTracker.push_back(mg_info_ptr[i]); impCoopParams[0] = &mg_info_ptr[i]; globalWorkSizeX = (size_t)lp.gridDim.x * (size_t)lp.blockDim.x; globalWorkSizeY = (size_t)lp.gridDim.y * (size_t)lp.blockDim.y; globalWorkSizeZ = (size_t)lp.gridDim.z * (size_t)lp.blockDim.z; - if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) - { + if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) { return hipErrorInvalidConfiguration; } @@ -849,6 +859,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < numDevices; ++j) { hip_internal::ihipHostFree(tls, mg_info_ptr[j]); + launchParamsList[j].stream->coopMemsTracker.pop_back(); } return hipErrorLaunchFailure; @@ -856,24 +867,34 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL } - // unlock all streams + // unlock streams and create blocking markers on them based on the workload + // on cooperative queues on each device for (int i = 0; i < numDevices; ++i) { coopAVs[i].release_locked_hsa_queue(); launchParamsList[i].stream->criticalData()._av.release_locked_hsa_queue(); + } + for (int i = 0; i < numDevices; ++i) { hc::completion_future cooperativeCF; if (!coopAVs[i].get_is_empty()) { cooperativeCF = coopAVs[i].create_marker(hc::accelerator_scope); - launchParamsList[i].stream->criticalData()._av.create_blocking_marker( - cooperativeCF, hc::accelerator_scope); + if (flags & hipCooperativeLaunchMultiDeviceNoPostSync) { + launchParamsList[i].stream->criticalData()._av.create_blocking_marker( + cooperativeCF, hc::accelerator_scope); + launchParamsList[i].stream->criticalData().unlock(); + } else { + for (int j = 0; j < numDevices; ++j) { + launchParamsList[j].stream->criticalData()._av.create_blocking_marker( + cooperativeCF, hc::accelerator_scope); + } + } } - - launchParamsList[i].stream->criticalData().unlock(); } - hip_internal::ihipHostFree(tls, mg_sync_ptr); - for (int j = 0; j < numDevices; ++j) { - hip_internal::ihipHostFree(tls, mg_info_ptr[j]); + if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + launchParamsList[i].stream->criticalData().unlock(); + } } return result; From 5d8d752da5d309fe4a257d8cfd3001a44cb36a3d Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Fri, 10 Apr 2020 22:49:13 -0400 Subject: [PATCH 084/132] Default stream needs to be initialized Change-Id: I5cbf9076bffb51c0e99601393645f443c1658e13 --- vdi/hip_device.cpp | 4 ++++ vdi/hip_internal.hpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/vdi/hip_device.cpp b/vdi/hip_device.cpp index cc27fef05e..80e247f37c 100644 --- a/vdi/hip_device.cpp +++ b/vdi/hip_device.cpp @@ -30,6 +30,10 @@ amd::HostQueue* Device::defaultStream() { defaultStream_ = new amd::HostQueue(*asContext(), *devices()[0], properties, amd::CommandQueue::RealTimeDisabled, amd::CommandQueue::Priority::Normal); + if ((defaultStream_ == nullptr) || + !defaultStream_->create()) { + return nullptr; + } } return defaultStream_; } diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 03dcf0d51d..9b4bd17042 100755 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -85,7 +85,7 @@ namespace hip { /// VDI context amd::Context* context_; /// VDI host queue for default streams - amd::HostQueue* defaultStream_; + amd::HostQueue* defaultStream_ = nullptr; /// Device's ID /// Store it here so we don't have to loop through the device list every time int deviceId_; From 9ead991784bd4ae793b8823d6b55024bb9737df4 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Mon, 13 Apr 2020 19:45:14 -0400 Subject: [PATCH 085/132] Fix cmake config file (#2010) Removed cmake target files under packaging directory. Merged cmake config .in files for HIP-Clang and HCC as one. Use cmake generated target files in both install and packaging. This makes cmake config file consistent for make install and make package. Change-Id: Iae1f8ccb5b39c29f54297edf1c5df62eb5a0253d --- CMakeLists.txt | 21 +--- hip-config-clang.cmake.in | 114 ------------------- hip-config-hcc.cmake.in | 68 ----------- hip-config.cmake.in | 168 ++++++++++++++++++++++++++++ packaging/hip-hcc.txt | 2 +- packaging/hip-targets-release.cmake | 51 --------- packaging/hip-targets.cmake | 139 ----------------------- 7 files changed, 175 insertions(+), 388 deletions(-) delete mode 100644 hip-config-clang.cmake.in delete mode 100644 hip-config-hcc.cmake.in create mode 100644 hip-config.cmake.in delete mode 100644 packaging/hip-targets-release.cmake delete mode 100644 packaging/hip-targets.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 0cfa56ac9d..4061b163ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -408,21 +408,12 @@ if(HIP_PLATFORM STREQUAL "hcc") install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) include(CMakePackageConfigHelpers) - if(HIP_COMPILER STREQUAL "hcc") - configure_package_config_file( - hip-config-hcc.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake - INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} - PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR - ) - elseif(HIP_COMPILER STREQUAL "clang") - configure_package_config_file( - hip-config-clang.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake - INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} - PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR - ) - endif() + configure_package_config_file( + hip-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake + INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} + PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR + ) write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/hip-config-version.cmake diff --git a/hip-config-clang.cmake.in b/hip-config-clang.cmake.in deleted file mode 100644 index 67dcb14695..0000000000 --- a/hip-config-clang.cmake.in +++ /dev/null @@ -1,114 +0,0 @@ -@PACKAGE_INIT@ - -include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) -if (NOT _CMakeFindDependencyMacro_FOUND) - macro(find_dependency dep) - if (NOT ${dep}_FOUND) - set(cmake_fd_version) - if (${ARGC} GREATER 1) - set(cmake_fd_version ${ARGV1}) - endif() - set(cmake_fd_exact_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) - set(cmake_fd_exact_arg EXACT) - endif() - set(cmake_fd_quiet_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - set(cmake_fd_quiet_arg QUIET) - endif() - set(cmake_fd_required_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) - set(cmake_fd_required_arg REQUIRED) - endif() - find_package(${dep} ${cmake_fd_version} - ${cmake_fd_exact_arg} - ${cmake_fd_quiet_arg} - ${cmake_fd_required_arg} - ) - string(TOUPPER ${dep} cmake_dep_upper) - if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) - set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") - set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) - return() - endif() - set(cmake_fd_version) - set(cmake_fd_required_arg) - set(cmake_fd_quiet_arg) - set(cmake_fd_exact_arg) - endif() - endmacro() -endif() - -set(HIP_COMPILER "@HIP_COMPILER@") -set(HIP_RUNTIME "@HIP_RUNTIME@") - -set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) -set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) -set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) -set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) - -set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") -set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") - -if(CMAKE_CXX_COMPILER MATCHES ".*hipcc") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version - OUTPUT_STRIP_TRAILING_WHITESPACE - OUTPUT_VARIABLE HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT) - if(HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT MATCHES "InstalledDir:[\t\r\n][\t\r\n]*([^\t\r\n])") - set(HIP_CLANG_ROOT ${CMAKE_MATCH_1}) - else() - set(HIP_CLANG_ROOT /opt/rocm/llvm) - endif() -else() - get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) - get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) -endif() -file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) -find_path(HIP_CLANG_INCLUDE_PATH stddef.h - HINTS - ${HIP_CLANG_INCLUDE_SEARCH_PATHS} - NO_DEFAULT_PATH) -find_dependency(amd_comgr) -find_dependency(AMDDeviceLibs) -set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "AMD GPU targets to compile for") -set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for") - -include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_COMPILE_OPTIONS -x hip --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib -) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_LINK_LIBRARIES --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib --hip-link -) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" -) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" -) - -foreach(GPU_TARGET ${GPU_TARGETS}) - set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_COMPILE_OPTIONS "--cuda-gpu-arch=${GPU_TARGET}" - ) - set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_LINK_LIBRARIES "--cuda-gpu-arch=${GPU_TARGET}" - ) -endforeach() - -set( hip_LIBRARIES hip::host hip::device) -set( hip_LIBRARY ${hip_LIBRARIES}) - -set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) -set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) -set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) -set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) -set(HIP_LIBRARIES ${hip_LIBRARIES}) -set(HIP_LIBRARY ${hip_LIBRARY}) -set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) -set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) - diff --git a/hip-config-hcc.cmake.in b/hip-config-hcc.cmake.in deleted file mode 100644 index c0ffc6e2af..0000000000 --- a/hip-config-hcc.cmake.in +++ /dev/null @@ -1,68 +0,0 @@ -@PACKAGE_INIT@ - -include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) -if (NOT _CMakeFindDependencyMacro_FOUND) - macro(find_dependency dep) - if (NOT ${dep}_FOUND) - set(cmake_fd_version) - if (${ARGC} GREATER 1) - set(cmake_fd_version ${ARGV1}) - endif() - set(cmake_fd_exact_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) - set(cmake_fd_exact_arg EXACT) - endif() - set(cmake_fd_quiet_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - set(cmake_fd_quiet_arg QUIET) - endif() - set(cmake_fd_required_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) - set(cmake_fd_required_arg REQUIRED) - endif() - find_package(${dep} ${cmake_fd_version} - ${cmake_fd_exact_arg} - ${cmake_fd_quiet_arg} - ${cmake_fd_required_arg} - ) - string(TOUPPER ${dep} cmake_dep_upper) - if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) - set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") - set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) - return() - endif() - set(cmake_fd_version) - set(cmake_fd_required_arg) - set(cmake_fd_quiet_arg) - set(cmake_fd_exact_arg) - endif() - endmacro() -endif() - -set(HIP_COMPILER "@HIP_COMPILER@") -set(HIP_RUNTIME "@HIP_RUNTIME@") - -set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) -set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) -set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) -set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) - -set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") -set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") - -find_dependency(hcc) -find_dependency(amd_comgr) -include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) - -set( hip_LIBRARIES hip::host hip::device) -set( hip_LIBRARY ${hip_LIBRARIES}) - -set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) -set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) -set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) -set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) -set(HIP_LIBRARIES ${hip_LIBRARIES}) -set(HIP_LIBRARY ${hip_LIBRARY}) -set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) -set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) - diff --git a/hip-config.cmake.in b/hip-config.cmake.in new file mode 100644 index 0000000000..ccfbf2b04f --- /dev/null +++ b/hip-config.cmake.in @@ -0,0 +1,168 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) +if (NOT _CMakeFindDependencyMacro_FOUND) + macro(find_dependency dep) + if (NOT ${dep}_FOUND) + set(cmake_fd_version) + if (${ARGC} GREATER 1) + set(cmake_fd_version ${ARGV1}) + endif() + set(cmake_fd_exact_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) + set(cmake_fd_exact_arg EXACT) + endif() + set(cmake_fd_quiet_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) + set(cmake_fd_quiet_arg QUIET) + endif() + set(cmake_fd_required_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) + set(cmake_fd_required_arg REQUIRED) + endif() + find_package(${dep} ${cmake_fd_version} + ${cmake_fd_exact_arg} + ${cmake_fd_quiet_arg} + ${cmake_fd_required_arg} + ) + string(TOUPPER ${dep} cmake_dep_upper) + if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") + set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) + return() + endif() + set(cmake_fd_version) + set(cmake_fd_required_arg) + set(cmake_fd_quiet_arg) + set(cmake_fd_exact_arg) + endif() + endmacro() +endif() + +set(HIP_COMPILER "@HIP_COMPILER@") +set(HIP_RUNTIME "@HIP_RUNTIME@") + +set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) +set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) +set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) +set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) + +set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") +set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") + +if(HIP_COMPILER STREQUAL "clang") + if(CMAKE_CXX_COMPILER MATCHES ".*hipcc") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + OUTPUT_STRIP_TRAILING_WHITESPACE + OUTPUT_VARIABLE HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT) + if(HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT MATCHES "InstalledDir:[\t\r\n][\t\r\n]*([^\t\r\n])") + set(HIP_CLANG_ROOT ${CMAKE_MATCH_1}) + else() + set(HIP_CLANG_ROOT /opt/rocm/llvm) + endif() + else() + get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) + get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) + endif() + file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) + find_path(HIP_CLANG_INCLUDE_PATH stddef.h + HINTS + ${HIP_CLANG_INCLUDE_SEARCH_PATHS} + NO_DEFAULT_PATH) + find_dependency(AMDDeviceLibs) + set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "AMD GPU targets to compile for") + set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for") +else() + find_dependency(hcc) +endif() + +find_dependency(amd_comgr) + +include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) + +#If HIP isnot installed under ROCm, need this to find HSA assuming HSA is under ROCm +if( DEFINED ENV{ROCM_PATH} ) + set(ROCM_PATH "$ENV{ROCM_PATH}") +endif() + +#get_filename_component cannot resolve the symlinks if called from /opt/rocm/lib/hip +#and do three level up again +get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) + +#if HSA is not under ROCm then provide CMAKE_PREFIX_PATH= +find_path(HSA_HEADER hsa/hsa.h + PATHS + "${_IMPORT_PREFIX}/../include" + /opt/rocm/include +) + +if (HSA_HEADER-NOTFOUND) + message (FATAL_ERROR "HSA header not found! ROCM_PATH environment not set") +endif() + +if(HIP_RUNTIME MATCHES "VDI") + set_target_properties(hip::amdhip64 PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + ) + set_target_properties(hip::device PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + ) +else() + set_target_properties(hip::hip_hcc_static PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}") + + set_target_properties(hip::hip_hcc PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + ) + set_target_properties(hip::device PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + ) +endif() + +if(HIP_COMPILER STREQUAL "clang") + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS -x hip --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib + ) + + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_LINK_LIBRARIES --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib --hip-link + ) + + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" + ) + + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" + ) + + foreach(GPU_TARGET ${GPU_TARGETS}) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS "--cuda-gpu-arch=${GPU_TARGET}" + ) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_LINK_LIBRARIES "--cuda-gpu-arch=${GPU_TARGET}" + ) + endforeach() +endif() + +set( hip_LIBRARIES hip::host hip::device) +set( hip_LIBRARY ${hip_LIBRARIES}) + +set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) +set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) +set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) +set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) +set(HIP_LIBRARIES ${hip_LIBRARIES}) +set(HIP_LIBRARY ${hip_LIBRARY}) +set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) +set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) + diff --git a/packaging/hip-hcc.txt b/packaging/hip-hcc.txt index 6a04ebffbd..d084e8d966 100644 --- a/packaging/hip-hcc.txt +++ b/packaging/hip-hcc.txt @@ -12,7 +12,7 @@ if(NOT @HIP_COMPILER@ STREQUAL "clang") endif() install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) -install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip) +install(FILES @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets.cmake @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets-release.cmake DESTINATION lib/cmake/hip) ############################# # Packaging steps diff --git a/packaging/hip-targets-release.cmake b/packaging/hip-targets-release.cmake deleted file mode 100644 index 3e6307952a..0000000000 --- a/packaging/hip-targets-release.cmake +++ /dev/null @@ -1,51 +0,0 @@ -#---------------------------------------------------------------- -# Generated CMake target import file for configuration "Release". -#---------------------------------------------------------------- - -# Commands may need to know the format version. -set(CMAKE_IMPORT_FILE_VERSION 1) - -#get_filename_component cannot resolve the symlinks if called from /opt/rocm/lib/hip -#and do three level up again -get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" REALPATH) -get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) - -# Import target "hip::hip_hcc_static" for configuration "Release" -set_property(TARGET hip::hip_hcc_static APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) -if(HIP_COMPILER STREQUAL "hcc" OR HIP_COMPILER STREQUAL "clang") -set_target_properties(hip::hip_hcc_static PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" - IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hc_am" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc_static.a" - ) -else() -set_target_properties(hip::hip_hcc_static PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc_static.a" - ) -endif() - -list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc_static ) -list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc_static "${_IMPORT_PREFIX}/lib/libhip_hcc_static.a" ) - -# Import target "hip::hip_hcc" for configuration "Release" -set_property(TARGET hip::hip_hcc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) -if(HIP_COMPILER STREQUAL "hcc" OR HIP_COMPILER STREQUAL "clang") -set_target_properties(hip::hip_hcc PROPERTIES - IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hcc::hccrt;hcc::hc_am" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc.so" - IMPORTED_SONAME_RELEASE "libhip_hcc.so" - ) -else() -set_target_properties(hip::hip_hcc PROPERTIES - IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hcc::hccrt" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc.so" - IMPORTED_SONAME_RELEASE "libhip_hcc.so" - ) -endif() - -list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc ) -list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc "${_IMPORT_PREFIX}/lib/libhip_hcc.so" ) - -# Commands beyond this point should not need to know the version. -set(CMAKE_IMPORT_FILE_VERSION) diff --git a/packaging/hip-targets.cmake b/packaging/hip-targets.cmake deleted file mode 100644 index 32fb7b067a..0000000000 --- a/packaging/hip-targets.cmake +++ /dev/null @@ -1,139 +0,0 @@ -# Generated by CMake 3.5.1 - -if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5) - message(FATAL_ERROR "CMake >= 2.6.0 required") -endif() -cmake_policy(PUSH) -cmake_policy(VERSION 2.6) -#---------------------------------------------------------------- -# Generated CMake target import file. -#---------------------------------------------------------------- - -# Commands may need to know the format version. -set(CMAKE_IMPORT_FILE_VERSION 1) - -# Protect against multiple inclusion, which would fail when already imported targets are added once more. -set(_targetsDefined) -set(_targetsNotDefined) -set(_expectedTargets) -foreach(_expectedTarget hip::hip_hcc_static hip::hip_hcc hip::host hip::device) - list(APPEND _expectedTargets ${_expectedTarget}) - if(NOT TARGET ${_expectedTarget}) - list(APPEND _targetsNotDefined ${_expectedTarget}) - endif() - if(TARGET ${_expectedTarget}) - list(APPEND _targetsDefined ${_expectedTarget}) - endif() -endforeach() -if("${_targetsDefined}" STREQUAL "${_expectedTargets}") - set(CMAKE_IMPORT_FILE_VERSION) - cmake_policy(POP) - return() -endif() -if(NOT "${_targetsDefined}" STREQUAL "") - message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n") -endif() -unset(_targetsDefined) -unset(_targetsNotDefined) -unset(_expectedTargets) - -#If HIP isnot installed under ROCm, need this to find HSA assuming HSA is under ROCm -if( DEFINED ENV{ROCM_PATH} ) - set(ROCM_PATH "$ENV{ROCM_PATH}") -endif() - -#get_filename_component cannot resolve the symlinks if called from /opt/rocm/lib/hip -#and do three level up again -get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" REALPATH) -get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) - -# Create imported target hip::hip_hcc_static -add_library(hip::hip_hcc_static STATIC IMPORTED) - -#if HSA is not under ROCm then provide CMAKE_PREFIX_PATH= -find_path(HSA_HEADER hsa/hsa.h - PATHS - "${ROCM_PATH}/include" - #Assuming HIP is installed under ROCm - "${_IMPORT_PREFIX}/../include" - /opt/rocm/include -) - -if (HSA_HEADER-NOTFOUND) - message (FATAL_ERROR "HSA header not found! ROCM_PATH environment not set") -endif() - -set_target_properties(hip::hip_hcc_static PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" -) - -# Create imported target hip::hip_hcc -add_library(hip::hip_hcc SHARED IMPORTED) - -set_target_properties(hip::hip_hcc PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" -) - -# Create imported target hip::host -add_library(hip::host INTERFACE IMPORTED) - -set_target_properties(hip::host PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::hip_hcc" -) - -# Create imported target hip::device -add_library(hip::device INTERFACE IMPORTED) - -if(HIP_COMPILER STREQUAL "hcc") -set_target_properties(hip::device PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::host;hcc::hccrt;hcc::hc_am" - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" -) -else() -set_target_properties(hip::device PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::host" -) -endif() - -if(CMAKE_VERSION VERSION_LESS 3.0.0) - message(FATAL_ERROR "This file relies on consumers using CMake 3.0.0 or greater.") -endif() - -# Load information for each installed configuration. -get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) -file(GLOB CONFIG_FILES "${_DIR}/hip-targets-*.cmake") -foreach(f ${CONFIG_FILES}) - include(${f}) -endforeach() - -# Cleanup temporary variables. -set(_IMPORT_PREFIX) - -# Loop over all imported files and verify that they actually exist -foreach(target ${_IMPORT_CHECK_TARGETS} ) - foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} ) - if(NOT EXISTS "${file}" ) - message(FATAL_ERROR "The imported target \"${target}\" references the file - \"${file}\" -but this file does not exist. Possible reasons include: -* The file was deleted, renamed, or moved to another location. -* An install or uninstall procedure did not complete successfully. -* The installation package was faulty and contained - \"${CMAKE_CURRENT_LIST_FILE}\" -but not all the files it references. -") - endif() - endforeach() - unset(_IMPORT_CHECK_FILES_FOR_${target}) -endforeach() -unset(_IMPORT_CHECK_TARGETS) - -# This file does not depend on other imported targets which have -# been exported from the same project but in a separate export set. - -# Commands beyond this point should not need to know the version. -set(CMAKE_IMPORT_FILE_VERSION) -cmake_policy(POP) From a87f517873e12d497e3c0d60286957d573397e89 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Mon, 13 Apr 2020 19:18:07 -0400 Subject: [PATCH 086/132] SWDEV-227602 - Adding support for hipFuncGetAttribute Change-Id: I16511274653c8c5521447eb2ed0fc5331dae8cba --- vdi/hip_hcc.def.in | 1 + vdi/hip_hcc.map.in | 1 + vdi/hip_module.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) mode change 100644 => 100755 vdi/hip_hcc.def.in mode change 100644 => 100755 vdi/hip_hcc.map.in diff --git a/vdi/hip_hcc.def.in b/vdi/hip_hcc.def.in old mode 100644 new mode 100755 index ef511ee43a..5eaedf6851 --- a/vdi/hip_hcc.def.in +++ b/vdi/hip_hcc.def.in @@ -138,6 +138,7 @@ hipModuleUnload hipOccupancyMaxPotentialBlockSize hipOccupancyMaxActiveBlocksPerMultiprocessor hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +hipFuncGetAttribute hipFuncGetAttributes hipPeekAtLastError hipPointerGetAttributes diff --git a/vdi/hip_hcc.map.in b/vdi/hip_hcc.map.in old mode 100644 new mode 100755 index 2139f45ab8..98a3479f40 --- a/vdi/hip_hcc.map.in +++ b/vdi/hip_hcc.map.in @@ -138,6 +138,7 @@ global: hipOccupancyMaxPotentialBlockSize; hipOccupancyMaxActiveBlocksPerMultiprocessor; hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + hipFuncGetAttribute; hipFuncGetAttributes; hipPeekAtLastError; hipPointerGetAttributes; diff --git a/vdi/hip_module.cpp b/vdi/hip_module.cpp index 1e5f7d8b31..3d40d8c967 100755 --- a/vdi/hip_module.cpp +++ b/vdi/hip_module.cpp @@ -262,6 +262,69 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h HIP_RETURN(hipSuccess); } +hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc) { + HIP_INIT_API(hipFuncGetAttribute, value, attrib, hfunc); + + if ((value == nullptr) || (hfunc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::Function* function = hip::Function::asFunction(hfunc); + if (function == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + amd::Kernel* kernel = function->function_; + if (kernel == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + const device::Kernel::WorkGroupInfo* wrkGrpInfo + = kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo(); + if (wrkGrpInfo == nullptr) { + HIP_RETURN(hipErrorMissingConfiguration); + } + + switch(attrib) { + case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->localMemSize_ + - wrkGrpInfo->privateMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: + *value = static_cast(wrkGrpInfo->wavefrontPerSIMD_ + * wrkGrpInfo->wavefrontSize_); + break; + case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->localMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_NUM_REGS: + *value = static_cast(wrkGrpInfo->availableGPRs_); + break; + case HIP_FUNC_ATTRIBUTE_PTX_VERSION: + *value = 30; // Defaults to 3.0 as HCC + break; + case HIP_FUNC_ATTRIBUTE_BINARY_VERSION: + *value = static_cast(kernel->signature().version()); + break; + case HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->availableLDSSize_); + break; + case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: + *value = 0; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { HIP_INIT_API(hipFuncGetAttributes, attr, func); From 88304c15e60481e77f3a280bae1b6a20108e2764 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 9 Apr 2020 16:56:36 -0400 Subject: [PATCH 087/132] Fix MIOpen build failure This is charrypick of 9ead991784bd4ae793b8823d6b55024bb9737df4 and https://github.com/ROCm-Developer-Tools/HIP/pull/2009 Fix cmake config file Removed cmake target files under packaging directory. Merged cmake config .in files for HIP-Clang and HCC as one. Use cmake generated target files in both install and packaging. This makes cmake config file consistent for make install and make package. Let device side malloc/free return nullptr and trap Change-Id: I448f3ea2d4934648089bad371debc203f895cba6 --- CMakeLists.txt | 25 ++-- hip-config-clang.cmake.in | 114 ---------------- hip-config-hcc.cmake.in | 68 ---------- hip-config.cmake.in | 168 ++++++++++++++++++++++++ include/hip/hcc_detail/hip_memory.h | 2 +- include/hip/hcc_detail/hip_runtime.h | 11 +- packaging/hip-hcc.txt | 2 +- packaging/hip-targets-release.cmake | 58 -------- packaging/hip-targets.cmake | 158 ---------------------- packaging/hip-vdi.txt | 3 +- tests/src/deviceLib/hipDeviceMalloc.cpp | 4 +- vdi/CMakeLists.txt | 5 +- 12 files changed, 195 insertions(+), 423 deletions(-) delete mode 100644 hip-config-clang.cmake.in delete mode 100644 hip-config-hcc.cmake.in create mode 100644 hip-config.cmake.in delete mode 100644 packaging/hip-targets-release.cmake delete mode 100644 packaging/hip-targets.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index c67ed29203..f0213ca9c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -454,27 +454,20 @@ endif() ############################# if(HIP_PLATFORM STREQUAL "hcc") install(TARGETS hip_hcc_static hip_hcc host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) - install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) + # ToDo: The cmake generated target files are not working. We have to install manually created ones + # for now. Should fix this and remove target files under packaging. + #install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) elseif( HIP_PLATFORM STREQUAL "vdi") # install(TARGETS hip_on_vdi host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) endif() include(CMakePackageConfigHelpers) - if(HIP_COMPILER STREQUAL "hcc") - configure_package_config_file( - hip-config-hcc.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake - INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} - PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR - ) - elseif(HIP_COMPILER STREQUAL "clang") - configure_package_config_file( - hip-config-clang.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake - INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} - PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR - ) - endif() + configure_package_config_file( + hip-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake + INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} + PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR + ) write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/hip-config-version.cmake diff --git a/hip-config-clang.cmake.in b/hip-config-clang.cmake.in deleted file mode 100644 index 7344458ffd..0000000000 --- a/hip-config-clang.cmake.in +++ /dev/null @@ -1,114 +0,0 @@ -@PACKAGE_INIT@ - -include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) -if (NOT _CMakeFindDependencyMacro_FOUND) - macro(find_dependency dep) - if (NOT ${dep}_FOUND) - set(cmake_fd_version) - if (${ARGC} GREATER 1) - set(cmake_fd_version ${ARGV1}) - endif() - set(cmake_fd_exact_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) - set(cmake_fd_exact_arg EXACT) - endif() - set(cmake_fd_quiet_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - set(cmake_fd_quiet_arg QUIET) - endif() - set(cmake_fd_required_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) - set(cmake_fd_required_arg REQUIRED) - endif() - find_package(${dep} ${cmake_fd_version} - ${cmake_fd_exact_arg} - ${cmake_fd_quiet_arg} - ${cmake_fd_required_arg} - ) - string(TOUPPER ${dep} cmake_dep_upper) - if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) - set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") - set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) - return() - endif() - set(cmake_fd_version) - set(cmake_fd_required_arg) - set(cmake_fd_quiet_arg) - set(cmake_fd_exact_arg) - endif() - endmacro() -endif() - -set(HIP_COMPILER "@HIP_COMPILER@") -set(HIP_RUNTIME "@HIP_RUNTIME@") - -set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) -set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) -set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) -set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) - -set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") -set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") - -if(CMAKE_CXX_COMPILER MATCHES ".*hipcc") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version - OUTPUT_STRIP_TRAILING_WHITESPACE - OUTPUT_VARIABLE HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT) - if(HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT MATCHES "InstalledDir:[\t\r\n][\t\r\n]*([^\t\r\n])") - set(HIP_CLANG_ROOT ${CMAKE_MATCH_1}) - else() - set(HIP_CLANG_ROOT /opt/rocm/llvm) - endif() -else() - get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) - get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) -endif() -file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) -find_path(HIP_CLANG_INCLUDE_PATH stddef.h - HINTS - ${HIP_CLANG_INCLUDE_SEARCH_PATHS} - NO_DEFAULT_PATH) -find_dependency(amd_comgr) -find_dependency(AMDDeviceLibs) -set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "AMD GPU targets to compile for") -set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for") - -include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_COMPILE_OPTIONS -x hip --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib -) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_LINK_LIBRARIES --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib --hip-link -) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" -) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" -) - -foreach(GPU_TARGET ${GPU_TARGETS}) - set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_COMPILE_OPTIONS "--cuda-gpu-arch=${GPU_TARGET}" - ) - set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_LINK_LIBRARIES "--cuda-gpu-arch=${GPU_TARGET}" - ) -endforeach() - -set( hip_LIBRARIES hip::host hip::device) -set( hip_LIBRARY ${hip_LIBRARIES}) - -set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) -set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) -set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) -set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) -set(HIP_LIBRARIES ${hip_LIBRARIES}) -set(HIP_LIBRARY ${hip_LIBRARY}) -set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) -set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) - diff --git a/hip-config-hcc.cmake.in b/hip-config-hcc.cmake.in deleted file mode 100644 index c0ffc6e2af..0000000000 --- a/hip-config-hcc.cmake.in +++ /dev/null @@ -1,68 +0,0 @@ -@PACKAGE_INIT@ - -include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) -if (NOT _CMakeFindDependencyMacro_FOUND) - macro(find_dependency dep) - if (NOT ${dep}_FOUND) - set(cmake_fd_version) - if (${ARGC} GREATER 1) - set(cmake_fd_version ${ARGV1}) - endif() - set(cmake_fd_exact_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) - set(cmake_fd_exact_arg EXACT) - endif() - set(cmake_fd_quiet_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - set(cmake_fd_quiet_arg QUIET) - endif() - set(cmake_fd_required_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) - set(cmake_fd_required_arg REQUIRED) - endif() - find_package(${dep} ${cmake_fd_version} - ${cmake_fd_exact_arg} - ${cmake_fd_quiet_arg} - ${cmake_fd_required_arg} - ) - string(TOUPPER ${dep} cmake_dep_upper) - if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) - set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") - set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) - return() - endif() - set(cmake_fd_version) - set(cmake_fd_required_arg) - set(cmake_fd_quiet_arg) - set(cmake_fd_exact_arg) - endif() - endmacro() -endif() - -set(HIP_COMPILER "@HIP_COMPILER@") -set(HIP_RUNTIME "@HIP_RUNTIME@") - -set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) -set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) -set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) -set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) - -set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") -set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") - -find_dependency(hcc) -find_dependency(amd_comgr) -include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) - -set( hip_LIBRARIES hip::host hip::device) -set( hip_LIBRARY ${hip_LIBRARIES}) - -set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) -set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) -set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) -set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) -set(HIP_LIBRARIES ${hip_LIBRARIES}) -set(HIP_LIBRARY ${hip_LIBRARY}) -set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) -set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) - diff --git a/hip-config.cmake.in b/hip-config.cmake.in new file mode 100644 index 0000000000..baa7c1607f --- /dev/null +++ b/hip-config.cmake.in @@ -0,0 +1,168 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) +if (NOT _CMakeFindDependencyMacro_FOUND) + macro(find_dependency dep) + if (NOT ${dep}_FOUND) + set(cmake_fd_version) + if (${ARGC} GREATER 1) + set(cmake_fd_version ${ARGV1}) + endif() + set(cmake_fd_exact_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) + set(cmake_fd_exact_arg EXACT) + endif() + set(cmake_fd_quiet_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) + set(cmake_fd_quiet_arg QUIET) + endif() + set(cmake_fd_required_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) + set(cmake_fd_required_arg REQUIRED) + endif() + find_package(${dep} ${cmake_fd_version} + ${cmake_fd_exact_arg} + ${cmake_fd_quiet_arg} + ${cmake_fd_required_arg} + ) + string(TOUPPER ${dep} cmake_dep_upper) + if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") + set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) + return() + endif() + set(cmake_fd_version) + set(cmake_fd_required_arg) + set(cmake_fd_quiet_arg) + set(cmake_fd_exact_arg) + endif() + endmacro() +endif() + +set(HIP_COMPILER "@HIP_COMPILER@") +set(HIP_RUNTIME "@HIP_RUNTIME@") + +set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) +set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) +set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) +set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) + +set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") +set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") + +if(HIP_COMPILER STREQUAL "clang") + if(CMAKE_CXX_COMPILER MATCHES ".*hipcc") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + OUTPUT_STRIP_TRAILING_WHITESPACE + OUTPUT_VARIABLE HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT) + if(HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT MATCHES "InstalledDir:[\t\r\n][\t\r\n]*([^\t\r\n])") + set(HIP_CLANG_ROOT ${CMAKE_MATCH_1}) + else() + set(HIP_CLANG_ROOT /opt/rocm/llvm) + endif() + else() + get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) + get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) + endif() + file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) + find_path(HIP_CLANG_INCLUDE_PATH stddef.h + HINTS + ${HIP_CLANG_INCLUDE_SEARCH_PATHS} + NO_DEFAULT_PATH) + find_dependency(AMDDeviceLibs) + set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "AMD GPU targets to compile for") + set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for") +else() + find_dependency(hcc) +endif() + +find_dependency(amd_comgr) + +include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) + +#If HIP isnot installed under ROCm, need this to find HSA assuming HSA is under ROCm +if( DEFINED ENV{ROCM_PATH} ) + set(ROCM_PATH "$ENV{ROCM_PATH}") +endif() + +#get_filename_component cannot resolve the symlinks if called from /opt/rocm/lib/hip +#and do three level up again +get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) + +#if HSA is not under ROCm then provide CMAKE_PREFIX_PATH= +find_path(HSA_HEADER hsa/hsa.h + PATHS + "${_IMPORT_PREFIX}/../include" + /opt/rocm/include +) + +if (HSA_HEADER-NOTFOUND) + message (FATAL_ERROR "HSA header not found! ROCM_PATH environment not set") +endif() + +if(HIP_RUNTIME MATCHES "VDI") + set_target_properties(hip::amdhip64 PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + ) + set_target_properties(hip::device PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + ) +else() + set_target_properties(hip::hip_hcc_static PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}") + + set_target_properties(hip::hip_hcc PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + ) + set_target_properties(hip::device PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + ) +endif() + +if(HIP_COMPILER STREQUAL "clang") + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS -x hip --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib + ) + + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_LINK_LIBRARIES --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib --hip-link + ) + + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.." + ) + + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.." + ) + + foreach(GPU_TARGET ${GPU_TARGETS}) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS "--cuda-gpu-arch=${GPU_TARGET}" + ) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_LINK_LIBRARIES "--cuda-gpu-arch=${GPU_TARGET}" + ) + endforeach() +endif() + +set( hip_LIBRARIES hip::host hip::device) +set( hip_LIBRARY ${hip_LIBRARIES}) + +set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) +set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) +set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) +set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) +set(HIP_LIBRARIES ${hip_LIBRARIES}) +set(HIP_LIBRARY ${hip_LIBRARY}) +set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) +set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) + diff --git a/include/hip/hcc_detail/hip_memory.h b/include/hip/hcc_detail/hip_memory.h index 866b9e879e..7a6958675b 100644 --- a/include/hip/hcc_detail/hip_memory.h +++ b/include/hip/hcc_detail/hip_memory.h @@ -27,7 +27,7 @@ THE SOFTWARE. // HIP heap is implemented as a global array with fixed size. Users may define // __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap. -#if __HCC__ || __HIP__ +#if (__HCC__ || __HIP__) && __HIP_ENABLE_MALLOC__ // Size of page in bytes. #ifndef __HIP_SIZE_OF_PAGE diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index fdb61e70d3..4c1f5e9839 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -44,6 +44,11 @@ THE SOFTWARE. #include #endif //__cplusplus +// __hip_malloc is not working. Disable it by default. +#ifndef __HIP_ENABLE_MALLOC__ +#define __HIP_ENABLE_MALLOC__ 0 +#endif + #if __HCC_OR_HIP_CLANG__ #if __HIP__ @@ -308,11 +313,15 @@ static constexpr Coordinates threadIdx{}; #endif // defined __HCC__ #if __HCC_OR_HIP_CLANG__ +#if __HIP_ENABLE_MALLOC__ extern "C" __device__ void* __hip_malloc(size_t); extern "C" __device__ void* __hip_free(void* ptr); - static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); } static inline __device__ void* free(void* ptr) { return __hip_free(ptr); } +#else +static inline __device__ void* malloc(size_t size) { __builtin_trap(); return nullptr; } +static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullptr; } +#endif #endif //__HCC_OR_HIP_CLANG__ diff --git a/packaging/hip-hcc.txt b/packaging/hip-hcc.txt index 6a04ebffbd..d084e8d966 100644 --- a/packaging/hip-hcc.txt +++ b/packaging/hip-hcc.txt @@ -12,7 +12,7 @@ if(NOT @HIP_COMPILER@ STREQUAL "clang") endif() install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) -install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip) +install(FILES @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets.cmake @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets-release.cmake DESTINATION lib/cmake/hip) ############################# # Packaging steps diff --git a/packaging/hip-targets-release.cmake b/packaging/hip-targets-release.cmake deleted file mode 100644 index 90fd961184..0000000000 --- a/packaging/hip-targets-release.cmake +++ /dev/null @@ -1,58 +0,0 @@ -#---------------------------------------------------------------- -# Generated CMake target import file for configuration "Release". -#---------------------------------------------------------------- - -# Commands may need to know the format version. -set(CMAKE_IMPORT_FILE_VERSION 1) - -#get_filename_component cannot resolve the symlinks if called from /opt/rocm/lib/hip -#and do three level up again -get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" REALPATH) -get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) - -if(NOT HIP_RUNTIME MATCHES "VDI") -# Import target "hip::hip_hcc_static" for configuration "Release" - set_property(TARGET hip::hip_hcc_static APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) - if(HIP_COMPILER STREQUAL "clang") - set_target_properties(hip::hip_hcc_static PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc_static.a" - ) - else() - set_target_properties(hip::hip_hcc_static PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" - IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hc_am" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc_static.a" - ) - endif() - list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc_static ) - list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc_static "${_IMPORT_PREFIX}/lib/libhip_hcc_static.a" ) - - # Import target "hip::hip_hcc" for configuration "Release" - set_property(TARGET hip::hip_hcc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) - if(HIP_COMPILER STREQUAL "clang") - set_target_properties(hip::hip_hcc PROPERTIES - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc.so" - IMPORTED_SONAME_RELEASE "libhip_hcc.so") - else() - set_target_properties(hip::hip_hcc PROPERTIES - IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hcc::hccrt;hcc::hc_am" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc.so" - IMPORTED_SONAME_RELEASE "libhip_hcc.so") - endif() - list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc ) - list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc "${_IMPORT_PREFIX}/lib/libhip_hcc.so" ) - -else() - - set_property(TARGET hip::amdhip64 APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) - set_target_properties(hip::amdhip64 PROPERTIES - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libamdhip64.so" - IMPORTED_SONAME_RELEASE "libamdhip64.so") - list(APPEND _IMPORT_CHECK_TARGETS hip::amdhip64) - list(APPEND _IMPORT_CHECK_FILES_FOR_hip::amdhip64 "${_IMPORT_PREFIX}/lib/libamdhip64.so" ) - -endif() - -# Commands beyond this point should not need to know the version. -set(CMAKE_IMPORT_FILE_VERSION) diff --git a/packaging/hip-targets.cmake b/packaging/hip-targets.cmake deleted file mode 100644 index 6f6957f4d6..0000000000 --- a/packaging/hip-targets.cmake +++ /dev/null @@ -1,158 +0,0 @@ -# Generated by CMake 3.5.1 - -if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5) - message(FATAL_ERROR "CMake >= 2.6.0 required") -endif() -cmake_policy(PUSH) -cmake_policy(VERSION 2.6) -#---------------------------------------------------------------- -# Generated CMake target import file. -#---------------------------------------------------------------- - -# Commands may need to know the format version. -set(CMAKE_IMPORT_FILE_VERSION 1) - -# Protect against multiple inclusion, which would fail when already imported targets are added once more. -set(_targetsDefined) -set(_targetsNotDefined) -set(_expectedTargets) -if(HIP_RUNTIME MATCHES "VDI") - foreach(_expectedTarget hip::amdhip64 hip::host hip::device) -else() - foreach(_expectedTarget hip:hip_hcc_static hip::hip_hcc hip::host hip::device) -endif() - list(APPEND _expectedTargets ${_expectedTarget}) - if(NOT TARGET ${_expectedTarget}) - list(APPEND _targetsNotDefined ${_expectedTarget}) - endif() - if(TARGET ${_expectedTarget}) - list(APPEND _targetsDefined ${_expectedTarget}) - endif() -endforeach() -if("${_targetsDefined}" STREQUAL "${_expectedTargets}") - set(CMAKE_IMPORT_FILE_VERSION) - cmake_policy(POP) - return() -endif() -if(NOT "${_targetsDefined}" STREQUAL "") - message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n") -endif() -unset(_targetsDefined) -unset(_targetsNotDefined) -unset(_expectedTargets) - -#If HIP isnot installed under ROCm, need this to find HSA assuming HSA is under ROCm -if( DEFINED ENV{ROCM_PATH} ) - set(ROCM_PATH "$ENV{ROCM_PATH}") -endif() - -#get_filename_component cannot resolve the symlinks if called from /opt/rocm/lib/hip -#and do three level up again -get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" REALPATH) -get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) - -# Create imported target hip::hip_hcc_static -if( NOT HIP_RUNTIME MATCHES "VDI") - add_library(hip::hip_hcc_static STATIC IMPORTED) -endif() - -#if HSA is not under ROCm then provide CMAKE_PREFIX_PATH= -find_path(HSA_HEADER hsa/hsa.h - PATHS - "${ROCM_PATH}/include" - #Assuming HIP is installed under ROCm - "${_IMPORT_PREFIX}/../include" - /opt/rocm/include -) - -if (HSA_HEADER-NOTFOUND) - message (FATAL_ERROR "HSA header not found! ROCM_PATH environment not set") -endif() -if(HIP_RUNTIME MATCHES "VDI") - # Create imported target hip::amdhip64 - add_library(hip::amdhip64 SHARED IMPORTED) - - set_target_properties(hip::amdhip64 PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - ) -else() - set_target_properties(hip::hip_hcc_static PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}") - - # Create imported target hip::hip_hcc - add_library(hip::hip_hcc SHARED IMPORTED) - - set_target_properties(hip::hip_hcc PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - ) -endif() - -# Create imported target hip::host -add_library(hip::host INTERFACE IMPORTED) - -if(HIP_RUNTIME MATCHES "VDI") - set_target_properties(hip::host PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::amdhip64") -else() - set_target_properties(hip::host PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::hip_hcc") -endif() - - -# Create imported target hip::device -add_library(hip::device INTERFACE IMPORTED) - -if(HIP_COMPILER STREQUAL "hcc") -set_target_properties(hip::device PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::host;hcc::hccrt;hcc::hc_am" - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" -) -else() -set_target_properties(hip::device PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::host" -) -endif() - -if(CMAKE_VERSION VERSION_LESS 3.0.0) - message(FATAL_ERROR "This file relies on consumers using CMake 3.0.0 or greater.") -endif() - -# Load information for each installed configuration. -get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) -file(GLOB CONFIG_FILES "${_DIR}/hip-targets-*.cmake") -foreach(f ${CONFIG_FILES}) - include(${f}) -endforeach() - -# Cleanup temporary variables. -set(_IMPORT_PREFIX) - -# Loop over all imported files and verify that they actually exist -foreach(target ${_IMPORT_CHECK_TARGETS} ) - foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} ) - if(NOT EXISTS "${file}" ) - message(FATAL_ERROR "The imported target \"${target}\" references the file - \"${file}\" -but this file does not exist. Possible reasons include: -* The file was deleted, renamed, or moved to another location. -* An install or uninstall procedure did not complete successfully. -* The installation package was faulty and contained - \"${CMAKE_CURRENT_LIST_FILE}\" -but not all the files it references. -") - endif() - endforeach() - unset(_IMPORT_CHECK_FILES_FOR_${target}) -endforeach() -unset(_IMPORT_CHECK_TARGETS) - -# This file does not depend on other imported targets which have -# been exported from the same project but in a separate export set. - -# Commands beyond this point should not need to know the version. -set(CMAKE_IMPORT_FILE_VERSION) -cmake_policy(POP) diff --git a/packaging/hip-vdi.txt b/packaging/hip-vdi.txt index eefdcf69fb..20aa356383 100644 --- a/packaging/hip-vdi.txt +++ b/packaging/hip-vdi.txt @@ -7,7 +7,8 @@ install(FILES @PROJECT_BINARY_DIR@/lib/libhiprtc.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) -install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip) +file(GLOB target_files @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets-*.cmake) +install(FILES @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets.cmake ${target_files} DESTINATION lib/cmake/hip) ############################# # Packaging steps diff --git a/tests/src/deviceLib/hipDeviceMalloc.cpp b/tests/src/deviceLib/hipDeviceMalloc.cpp index 4af7614a5c..aec891ed19 100644 --- a/tests/src/deviceLib/hipDeviceMalloc.cpp +++ b/tests/src/deviceLib/hipDeviceMalloc.cpp @@ -17,8 +17,8 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s NVCC_OPTIONS -std=c++11 - * TEST: %t EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM all + * TEST: %t EXCLUDE_HIP_PLATFORM all * HIT_END */ #include "test_common.h" diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index ee772874af..8c1ca1f2de 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -158,12 +158,11 @@ add_library(amdhip64_static STATIC add_library(host INTERFACE) target_link_libraries(host INTERFACE amdhip64) -target_link_libraries(host INTERFACE amdhip64_static) add_library(device INTERFACE) target_link_libraries(device INTERFACE host) -target_link_libraries(amdhip64_static amdvdi_static pthread dl) -target_link_libraries(amdhip64 amdvdi_static pthread dl) +target_link_libraries(amdhip64_static PRIVATE amdvdi_static pthread dl) +target_link_libraries(amdhip64 PRIVATE amdvdi_static pthread dl) INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) From 8d83e954572e756fbb9ed34467de31a207666407 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 14 Apr 2020 06:37:14 -0400 Subject: [PATCH 088/132] Disable device side malloc (#2009) * Disable device side malloc Currently device side malloc is not working and takes excessive device memory. Disable it for now until a working malloc is implemented. Change-Id: I1ad908c1c53a83752383b4be96688a848642c699 --- include/hip/hcc_detail/hip_memory.h | 2 +- include/hip/hcc_detail/hip_runtime.h | 11 ++++++++++- src/hip_device.cpp | 5 +++-- src/hip_memory.cpp | 3 +++ tests/src/deviceLib/hipDeviceMalloc.cpp | 4 ++-- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/include/hip/hcc_detail/hip_memory.h b/include/hip/hcc_detail/hip_memory.h index 866b9e879e..0c006143de 100644 --- a/include/hip/hcc_detail/hip_memory.h +++ b/include/hip/hcc_detail/hip_memory.h @@ -27,7 +27,7 @@ THE SOFTWARE. // HIP heap is implemented as a global array with fixed size. Users may define // __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap. -#if __HCC__ || __HIP__ +#if (__HCC__ || __HIP__) && __HIP_ENABLE_DEVICE_MALLOC__ // Size of page in bytes. #ifndef __HIP_SIZE_OF_PAGE diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 0707cc6899..582e0cdefa 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -44,6 +44,11 @@ THE SOFTWARE. #include #endif //__cplusplus +// __hip_malloc is not working. Disable it by default. +#ifndef __HIP_ENABLE_DEVICE_MALLOC__ +#define __HIP_ENABLE_DEVICE_MALLOC__ 0 +#endif + #if __HCC_OR_HIP_CLANG__ #if __HIP__ @@ -305,11 +310,15 @@ static constexpr Coordinates threadIdx{}; #endif // defined __HCC__ #if __HCC_OR_HIP_CLANG__ +#if __HIP_ENABLE_DEVICE_MALLOC__ extern "C" __device__ void* __hip_malloc(size_t); extern "C" __device__ void* __hip_free(void* ptr); - static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); } static inline __device__ void* free(void* ptr) { return __hip_free(ptr); } +#else +static inline __device__ void* malloc(size_t size) { __builtin_trap(); return nullptr; } +static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullptr; } +#endif #endif //__HCC_OR_HIP_CLANG__ diff --git a/src/hip_device.cpp b/src/hip_device.cpp index e5797727ae..f7d6b3ac79 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -96,12 +96,13 @@ hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) { if (pValue == nullptr) { return ihipLogStatus(hipErrorInvalidValue); } +#if __HIP_ENABLE_DEVICE_MALLOC__ if (limit == hipLimitMallocHeapSize) { *pValue = (size_t)__HIP_SIZE_OF_HEAP; return ihipLogStatus(hipSuccess); - } else { - return ihipLogStatus(hipErrorUnsupportedLimit); } +#endif + return ihipLogStatus(hipErrorUnsupportedLimit); } hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) { diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index de6bc63b20..82ecaea82a 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" @@ -30,8 +31,10 @@ THE SOFTWARE. #include #include +#if __HIP_ENABLE_DEVICE_MALLOC__ __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP]; __device__ uint32_t __hip_device_page_flag[__HIP_NUM_PAGES]; +#endif // Internal HIP APIS: namespace hip_internal { diff --git a/tests/src/deviceLib/hipDeviceMalloc.cpp b/tests/src/deviceLib/hipDeviceMalloc.cpp index 4af7614a5c..aec891ed19 100644 --- a/tests/src/deviceLib/hipDeviceMalloc.cpp +++ b/tests/src/deviceLib/hipDeviceMalloc.cpp @@ -17,8 +17,8 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s NVCC_OPTIONS -std=c++11 - * TEST: %t EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM all + * TEST: %t EXCLUDE_HIP_PLATFORM all * HIT_END */ #include "test_common.h" From 3128b2dfd1e77382dcad812e198540303cf68069 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 14 Apr 2020 09:47:59 -0400 Subject: [PATCH 089/132] Fix build failure of rocPRIM Two issues are fixed: libamdhip64_static.a is not included in package. cmake generated target files uses installation path of libraries which are created when the libraries are built and installed. The CI uses customized installation directory which is not the package installation directory, thefore the library location in cmake generated target files differs from the library location installed from package. This causes rocPRIM build failure since rocPRIM uses pkg-config which checks library location. The fix is to fix the library location before adding cmake generated target files to package. Change-Id: I4aa2c6138f58df6d4a86301a5c0436edcb19ab70 --- packaging/hip-vdi.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/packaging/hip-vdi.txt b/packaging/hip-vdi.txt index 20aa356383..2bbe4331d0 100644 --- a/packaging/hip-vdi.txt +++ b/packaging/hip-vdi.txt @@ -2,19 +2,29 @@ cmake_minimum_required(VERSION 2.8.3) project(hip_vdi) install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64_static.a DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/lib/libhip_hcc.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/lib/libhiprtc.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) -file(GLOB target_files @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets-*.cmake) -install(FILES @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets.cmake ${target_files} DESTINATION lib/cmake/hip) ############################# # Packaging steps ############################# set(CPACK_SET_DESTDIR TRUE) set(CPACK_INSTALL_PREFIX "/opt/rocm/hip") + +## cmake generated target files contains IMPORTED_LOCATION_RELEASE etc. which +## is installation path when building the project, which may be different from +## the intallation path for packaging. These paths have to be replaced by +## the package installation path, otherwise apps using pkg-config will fail. +file(GLOB _target_files @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets*.cmake) +foreach(_target_file ${_target_files}) + execute_process(COMMAND sed -i s:@CMAKE_INSTALL_PREFIX@:${CPACK_INSTALL_PREFIX}:g ${_target_file}) +endforeach() +install(FILES ${_target_files} DESTINATION lib/cmake/hip) + set(CPACK_PACKAGE_NAME "hip-vdi") set(HCC_PACKAGE_NAME "vdi") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [VDI]") From f95cdb09c273c2a352efe3219bc2f8bcd305931a Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Mon, 13 Apr 2020 15:07:40 -0400 Subject: [PATCH 090/132] Correctly calculate size of the copy region Since we adjust we adjust the start of the region, amd::BufferRect::end_ is no longer the size, just the offset as to where the region ends. The actual size of the region is (amd::BufferRect::end_ - amd::BufferRect::start_). Change-Id: I8425d8bdfb20f485740863813e762e8923d9ee94 --- vdi/hip_memory.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index 1625f7703a..eb56b69bd2 100644 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -1028,8 +1028,8 @@ hipError_t ihipMemcpyDtoD(void* srcDevice, srcRect.end_ += srcOffset; amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - if (!srcMemory->validateRegion(srcStart, srcEnd)) { + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { return hipErrorInvalidValue; } @@ -1041,8 +1041,8 @@ hipError_t ihipMemcpyDtoD(void* srcDevice, dstRect.end_ += dstOffset; amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); - if (!dstMemory->validateRegion(dstStart, dstEnd)) { + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { return hipErrorInvalidValue; } @@ -1092,8 +1092,8 @@ hipError_t ihipMemcpyDtoH(void* srcDevice, srcRect.end_ += srcOffset; amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - if (!srcMemory->validateRegion(srcStart, srcEnd)) { + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { return hipErrorInvalidValue; } @@ -1152,8 +1152,8 @@ hipError_t ihipMemcpyHtoD(const void* srcHost, dstRect.end_ += dstOffset; amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); - if (!dstMemory->validateRegion(dstStart, dstEnd)) { + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { return hipErrorInvalidValue; } From 4c2ab3f41e8f5de16e16ee42c708a26cb166da3d Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Mon, 6 Apr 2020 09:58:35 -0400 Subject: [PATCH 091/132] Solve issues with hip-vdi runtime static lib 1.Combine libamdhip64_static_base.a and libamdvdi_static.a into libamdhip64_static.a. 2.Let hipcc use -use-staticlib to link libamdhip64_static.a. 3.Add some samples for static lib. 4.Fix compiling failure of code object. Change-Id: Ia2333622a8d05639b90974c4c5d3d85654ba0138 --- bin/hipcc | 14 ++++++++++---- samples/0_Intro/bit_extract/Makefile | 7 ++++++- samples/0_Intro/square/Makefile | 7 +++++-- vdi/CMakeLists.txt | 23 ++++++++++++++++++----- vdi/hip_internal.hpp | 6 ++++++ vdi/hip_platform.cpp | 2 +- 6 files changed, 46 insertions(+), 13 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 5ed781bc60..25b9078cd6 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -211,10 +211,7 @@ if ($HIP_PLATFORM eq "clang") { $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPCFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPLDFLAGS .= " -L$HIP_LIB_PATH"; - if (not $isWindows) { - $HIPLDFLAGS .= " -Wl,--rpath-link=$HIP_LIB_PATH"; - $HIPLDFLAGS .= " -lhip_hcc"; - } else { + if ($isWindows) { $HIPLDFLAGS .= " -lamdhip64"; } if ($HIP_CLANG_HCC_COMPAT_MODE) { @@ -480,6 +477,7 @@ foreach $arg (@ARGV) { $linkType = 0; $setLinkType = 1; + $swallowArg = 1; } if(($trimarg eq '-use-sharedlib') and ($setLinkType eq 0)) { @@ -770,6 +768,14 @@ if ($HIP_PLATFORM eq "clang") { if (not $isWindows) { $HIPLDFLAGS .= " -lgcc_s -lgcc -lpthread -lm"; } + + if (not $isWindows and not $compileOnly) { + if ($linkType eq 0) { + $toolArgs .= " -L$HIP_LIB_PATH -lamdhip64_static -L$ROCM_PATH/lib -lhsa-runtime64 -ldl "; + } else { + $toolArgs .= " -Wl,--enable-new-dtags -Wl,--rpath=$HIP_LIB_PATH:$ROCM_PATH/lib -lhip_hcc "; + } + } } diff --git a/samples/0_Intro/bit_extract/Makefile b/samples/0_Intro/bit_extract/Makefile index 08bca6e642..4a3a0bb4fe 100644 --- a/samples/0_Intro/bit_extract/Makefile +++ b/samples/0_Intro/bit_extract/Makefile @@ -13,10 +13,15 @@ ifeq (${HIP_PLATFORM}, nvcc) endif EXE=bit_extract +EXE_STATIC=bit_extract_static $(EXE): bit_extract.cpp $(HIPCC) $(HIPCC_FLAGS) $< -o $@ +$(EXE_STATIC): bit_extract.cpp + $(HIPCC) -use-staticlib $(HIPCC_FLAGS) $< -o $@ + +all: $(EXE) $(EXE_STATIC) clean: - rm -f *.o $(EXE) + rm -f *.o $(EXE) $(EXE_STATIC) diff --git a/samples/0_Intro/square/Makefile b/samples/0_Intro/square/Makefile index 9bb0dd8205..aa046eeaaa 100644 --- a/samples/0_Intro/square/Makefile +++ b/samples/0_Intro/square/Makefile @@ -11,7 +11,7 @@ else SOURCES=square.cpp endif -all: square.out +all: square.out square.out.static # Step square.cpp: square.cu @@ -20,5 +20,8 @@ square.cpp: square.cu square.out: $(SOURCES) $(HIPCC) $(CXXFLAGS) $(SOURCES) -o $@ +square.out.static: $(SOURCES) + $(HIPCC) -use-staticlib $(CXXFLAGS) $(SOURCES) -o $@ + clean: - rm -f *.o *.out square.cpp + rm -f *.o *.out *.out.static square.cpp diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index 8c1ca1f2de..aa82dec373 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -152,21 +152,33 @@ add_library(amdhip64 SHARED $ ) -add_library(amdhip64_static STATIC +add_library(amdhip64_static_base STATIC $ ) add_library(host INTERFACE) target_link_libraries(host INTERFACE amdhip64) +target_link_libraries(host INTERFACE amdhip64_static_base) add_library(device INTERFACE) target_link_libraries(device INTERFACE host) -target_link_libraries(amdhip64_static PRIVATE amdvdi_static pthread dl) +target_link_libraries(amdhip64_static_base PRIVATE amdvdi_static pthread dl) target_link_libraries(amdhip64 PRIVATE amdvdi_static pthread dl) +set(STATICLIBNAME "${hip_BINARY_DIR}/lib/libamdhip64_static.a") + +add_custom_command( + OUTPUT ${STATICLIBNAME} + COMMAND rm -f ${STATICLIBNAME} + COMMAND ${CMAKE_AR} -rcsT ${STATICLIBNAME} $ $ + DEPENDS amdhip64_static_base amdvdi_static + COMMENT "Combining static libs into ${STATICLIBNAME} " +) + +add_custom_target(amdhip64_static ALL + DEPENDS ${STATICLIBNAME} +) -INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) -INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) INSTALL(CODE "execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink libamdhip64.so lib/libhip_hcc.so )" DESTINATION lib COMPONENT MAIN) INSTALL(CODE "execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink libamdhip64.so lib/libhiprtc.so )" DESTINATION lib COMPONENT MAIN) @@ -174,6 +186,7 @@ INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhip_hcc.so DESTINATION lib COMPONENT MA INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc.so DESTINATION lib COMPONENT MAIN) -INSTALL(TARGETS amdhip64_static amdhip64 host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) +INSTALL(PROGRAMS ${STATICLIBNAME} DESTINATION ${LIB_INSTALL_DIR}) +INSTALL(TARGETS amdhip64_static_base amdhip64 host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) INSTALL(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 9b4bd17042..10819350f5 100755 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -132,6 +132,7 @@ namespace hip { extern void init(); extern Device* getCurrentDevice(); + extern void setCurrentDevice(unsigned int index); /// Get VDI queue associated with hipStream @@ -255,6 +256,11 @@ private: ~PlatformState() {} public: static PlatformState& instance() { + if (platform_ == nullptr) { + // __hipRegisterFatBinary() will call this when app starts, thus + // there is no multiple entry issue here. + platform_ = new PlatformState(); + } return *platform_; } diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index 5ece473e06..52ad36c29d 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -30,7 +30,7 @@ constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" thread_local std::stack execStack_; -PlatformState* PlatformState::platform_ = new PlatformState(); +PlatformState* PlatformState::platform_ = nullptr; struct __CudaFatBinaryWrapper { unsigned int magic; From 0ccb7f08a58208342cbfb062a1cf706ecb9adb1a Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Fri, 17 Apr 2020 01:01:06 -0400 Subject: [PATCH 092/132] moving the extractkernel tool from hcc to hip (#1644) --- bin/extractkernel | 249 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100755 bin/extractkernel diff --git a/bin/extractkernel b/bin/extractkernel new file mode 100755 index 0000000000..871610be06 --- /dev/null +++ b/bin/extractkernel @@ -0,0 +1,249 @@ +#!/usr/bin/perl +use strict; +use File::Copy; +use File::Spec; +use File::Basename; +use File::Which; +use Cwd 'realpath'; +use Getopt::Std; +use List::Util qw(max); + +sub usage { + print("Usage: $0 [OPTION]... -i \n"); + print("Extract the device kernels from an hcc executable.\n\n"); + print("-h \t\t\t\tshow this help message\n"); + print("-i \t\t\t\tinput file\n"); + exit; +} + +my $debug = 0; + +# use clang offload bundler (instead of "dd") +# to extract device object from the bundle +my $use_clang_offload_bundler = 1; + +my %options=(); +getopts('hi:', \%options); + +if (!%options || defined $options{h}) { + usage(); +} + +my $input_file; +defined $options{i} || die("input not specified"); +$input_file = $options{i}; +(-f $input_file) || die("can't find $input_file"); + +# look for llvm-objdump and clang-offload-bundler +my $tools_path_prefix; +my $llvm_objdump; +my $clang_offload_bundler; + +if (defined $ENV{'HCC_HOME'}) { + $tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin"); + $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); + $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); +} +else { + $tools_path_prefix = dirname(realpath($0)); + $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); + $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); + if (!(-f $llvm_objdump)) { + $tools_path_prefix = realpath($tools_path_prefix."/../../hcc/bin"); + $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); + $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); + } +} + +if (!(-f $llvm_objdump)) { + $llvm_objdump = which("llvm-objdump"); + if (!(-f $llvm_objdump)) { + die("Can't find llvm-objdump\n"); + } +} + +if (!(-f $clang_offload_bundler)) { + $clang_offload_bundler = which("clang-offload-bundler"); + if (!(-f $clang_offload_bundler)) { + die("Can't find clang-offload-bundler\n"); + } +} + +# kernel section information for HCC +my $kernel_section_name = ".kernel"; +my $kernel_triple = "hcc-amdgcn-amd-amdhsa--"; +my $kernel_blob_alignment = 1; + +my $kernel_section_size = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`); +if (!$kernel_section_size) { + + # If there isn't a section created by HCC, + # try to detect a kernel section created by HIP-Clang + $kernel_section_name = ".hip_fatbin"; + $kernel_triple = "hip-amdgcn-amd-amdhsa-"; + $kernel_blob_alignment = 8; + + $kernel_section_size = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`); + $kernel_section_size or die("No kernel section found\n"); +} + +my $kernel_section_offset = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$6}'`); +my $kernel_section_end = $kernel_section_offset + $kernel_section_size; +if ($debug) { + print "kernel section size: $kernel_section_size\n"; + print "kernel section offset: $kernel_section_offset\n"; + print "kernel section end: $kernel_section_end\n"; +} + +# parse kernel bundle header +open INPUT_FP, $input_file || die $!; +binmode INPUT_FP; + +my $current_blob_offset = $kernel_section_offset; +my $num_blobs = 0; +#while ($current_blob_offset < $kernel_section_end) { +while(1) { + + # adjust the offset to the blob alignment + $current_blob_offset = int(($current_blob_offset + ($kernel_blob_alignment - 1)) / $kernel_blob_alignment) * $kernel_blob_alignment; + if ($debug) { + print "Current blob offset: $current_blob_offset\n"; + } + + if ($current_blob_offset >= $kernel_section_end) { + if ($debug) { + print "reached end of kernel section\n"; + } + last; + } + + seek(INPUT_FP, $current_blob_offset, 0); + + # skip OFFLOAD_BUNDLER_MAGIC_STR + my $magic_str; + my $read_bytes = read(INPUT_FP, $magic_str, 24); + if (($read_bytes != 24) || ($magic_str ne "__CLANG_OFFLOAD_BUNDLE__")) { + # didn't detect the bundle magic string + if ($debug) { + print "Offload bundle magic string not detected\n"; + } + last; + } + # read number of bundles + my $num_bundles; + $read_bytes = read(INPUT_FP, $num_bundles, 8); + $read_bytes == 8 or die("Fail to parse number of bundles\n"); + $num_bundles = unpack("Q", $num_bundles); + if ($debug) { + print "Blob $num_blobs, number of bundles: $num_bundles\n"; + } + + # detected GPU targets + my @asic_target_array; + + my $last_bundle_offset = 0; + my $last_bundle_size = 0; + + # strings for creating new files + my $file_blob_number = sprintf("%03d", $num_blobs); + my $filename_prefix = "${input_file}-${file_blob_number}"; + + my $clang_offloadbundler_outputs="-outputs=/dev/null"; + my $clang_offloadbundler_targets="-targets=host-x86_64-unknown-linux"; + + for (my $iter = 0; $iter < $num_bundles; $iter++) { + # read bundle offset + my $offset; + $read_bytes = read(INPUT_FP, $offset, 8); + $read_bytes == 8 or die("Fail to parse bundle offset\n"); + $offset = unpack("Q", $offset); + $last_bundle_offset = max($last_bundle_offset, $offset); + + # read bundle size + my $size; + $read_bytes = read(INPUT_FP, $size, 8); + $read_bytes == 8 or die("Fail to parse bundle size\n"); + $size = unpack("Q", $size); + if ($last_bundle_offset == $offset) { + $last_bundle_size = $size; + } + + # read triple size + my $triple_size; + $read_bytes = read(INPUT_FP, $triple_size, 8); + $read_bytes == 8 or die("Fail to parse triple size\n"); + $triple_size = unpack("Q", $triple_size); + + # triple + my $triple; + $read_bytes = read(INPUT_FP, $triple, $triple_size); + $read_bytes == $triple_size or die("Fail to parse triple\n"); + + if ($debug) { + print("\t bundle $iter: offset=$offset, size=$size, triple_size=$triple_size, triple=$triple\n"); + } + + # Only process GPU targets, skip host targets + my $triple_pattern = "^$kernel_triple"; + if ($triple =~ /$triple_pattern/) { + my $asic_target = substr($triple, length($kernel_triple)); + + # augment arguments for clang-offload-bundler + my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco"; + $clang_offloadbundler_outputs = "${clang_offloadbundler_outputs},${hsaco_file_name}"; + $clang_offloadbundler_targets = "${clang_offloadbundler_targets},${triple}"; + + # add into asic_target_array + $asic_target_array[$#asic_target_array + 1]=$asic_target; + + if (!$use_clang_offload_bundler) { + my $offset_for_hsaco = $current_blob_offset + $offset; + my $dd_command ="dd if=${input_file} of=${hsaco_file_name} skip=$offset_for_hsaco count=$size bs=1 status=none"; + if ($debug) { + print("extract code bundle with dd: $dd_command\n"); + } + system($dd_command) == 0 + or die("Fail to extract code bundle with dd\n"); + } + + } else { + #print("Host target: " . $Triple . "\n"); + } + } + + # extract the code blob + my $blob_filename = "${filename_prefix}.bundle"; + my $write_bytes = $last_bundle_offset + $last_bundle_size; + system("dd if=$input_file of=$blob_filename skip=$current_blob_offset count=$write_bytes bs=1 status=none") == 0 + or die("Extracting kernel bundle file failed: $?"); + + if ($use_clang_offload_bundler) { + # use clang-offload-bundler to unbundle HSACO + my $command = "${clang_offload_bundler} -unbundle -type=o -inputs=${blob_filename} ${clang_offloadbundler_outputs} ${clang_offloadbundler_targets}"; + if ($debug) { + print("clang offload bundler command: $command\n"); + } + system($command) == 0 + or die("Fail to execute clang-offload-bundler"); + } + + for (my $iter = 0; $iter <= $#asic_target_array; $iter++) { + my $asic_target = $asic_target_array[$iter]; + my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco"; + my $isa_file_name = "${filename_prefix}-${asic_target}.isa"; + + # use llvm-objdump to dump out GCN ISA + system("$llvm_objdump -disassemble -mcpu=$asic_target $hsaco_file_name > $isa_file_name") == 0 or die("Fail to disassemble AMDGPU ISA for target" . $asic_target); + + if ($debug) { + print("Generated GCN ISA for " . $asic_target . " at: " . $isa_file_name . "\n"); + } + } + + $current_blob_offset = $current_blob_offset + $last_bundle_offset + $last_bundle_size; + $num_blobs++; +} + +$num_blobs or die("No device code found.\n"); +exit(0); + From ef596cd088b82331a314350f04e64ed3f97eb4b1 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Thu, 16 Apr 2020 22:01:22 -0700 Subject: [PATCH 093/132] add IPC event support (#1996) --- include/hip/hcc_detail/hip_runtime_api.h | 17 +- src/hip_event.cpp | 310 ++++++++++++++++++--- src/hip_hcc.cpp | 45 ++- src/hip_hcc_internal.h | 38 ++- src/hip_stream.cpp | 31 ++- src/trace_helper.h | 5 + tests/src/runtimeApi/event/hipEventIpc.cpp | 112 ++++++++ 7 files changed, 486 insertions(+), 72 deletions(-) create mode 100644 tests/src/runtimeApi/event/hipEventIpc.cpp diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 12fd9b7a91..b0d1c3570d 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -97,8 +97,6 @@ typedef int hipDevice_t; typedef struct ihipStream_t* hipStream_t; -// TODO: IPC implementation - #define hipIpcMemLazyEnablePeerAccess 0 #define HIP_IPC_HANDLE_SIZE 64 @@ -107,12 +105,9 @@ typedef struct hipIpcMemHandle_st { char reserved[HIP_IPC_HANDLE_SIZE]; } hipIpcMemHandle_t; -// TODO: IPC event handle currently unsupported -struct ihipIpcEventHandle_t; -typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t; - - -// END TODO +typedef struct hipIpcEventHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcEventHandle_t; typedef struct ihipModule_t* hipModule_t; @@ -3154,10 +3149,8 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned hipError_t hipIpcCloseMemHandle(void* devPtr); -// hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); -// hipError_t hipIpcCloseMemHandle(void *devPtr); -// // hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); -// hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event); +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); /** diff --git a/src/hip_event.cpp b/src/hip_event.cpp index b297fabbd9..c626f7956d 100644 --- a/src/hip_event.cpp +++ b/src/hip_event.cpp @@ -24,6 +24,74 @@ THE SOFTWARE. #include "hip_hcc_internal.h" #include "trace_helper.h" +#include // errno, ENOENT +#include // O_RDWR, O_CREATE +#include // shm_open, shm_unlink, mmap, munmap, PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED +#include // ftruncate, close + +namespace { + + inline + const char* hsa_to_string(hsa_status_t err) noexcept + { + const char* r{}; + + if (hsa_status_string(err, &r) == HSA_STATUS_SUCCESS) return r; + + return "Unknown."; + } + + template + inline + void throwing_result_check(hsa_status_t res, const char (&file)[m], + const char (&function)[n], int line) { + if (res == HSA_STATUS_SUCCESS) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + hsa_to_string(res)}; + } + + template + inline + void throwing_retval_check(int good, int retval, const char (&file)[m], + const char (&function)[n], int line) { + if (retval == good) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + strerror(retval)}; + } + + template + inline + void throwing_msg_check(bool bad, const char (&msg)[o], + const char (&file)[m], + const char (&function)[n], int line) { + if (!bad) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + msg}; + } + + template + inline + void throwing_errno_check(bool bad, const char (&file)[m], + const char (&function)[n], int line) { + if (!bad) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + strerror(errno)}; + } + +} // Unnamed namespace. + //------------------------------------------------------------------------------------------------- //------------------------------------------------------------------------------------------------- // Events @@ -49,6 +117,43 @@ void ihipEvent_t::attachToCompletionFuture(const hc::completion_future* cf, hipS } +static void createIpcEventShmemIfNeeded(ihipEventData_t &ecd) { + if (!ecd._ipc_name.empty()) return; + + // create random shmem name + char name_template[] = "/tmp/eventXXXXXX"; + int temp_fd = mkstemp(name_template); + throwing_errno_check(-1 == temp_fd, __FILE__, __func__, __LINE__); + + // copy shmem name into event data, reformat to use a single slash + ecd._ipc_name = name_template; + ecd._ipc_name.replace(0, 5, "/hip_"); + + // open shmem + ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR | O_CREAT, 0777); + throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__); + + // size it + throwing_retval_check(0, ftruncate(ecd._ipc_fd, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__); + + // mmap it + ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0); + throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__); + + // initialize shared state + ecd._ipc_shmem->owners = 1; + ecd._ipc_shmem->read_index = -1; + ecd._ipc_shmem->write_index = 0; + for (int i=0; i < IPC_SIGNALS_PER_EVENT; i++) { + ecd._ipc_shmem->signal[i] = 0; + } + + // remove temp file + throwing_errno_check(-1 == close(temp_fd), __FILE__, __func__, __LINE__); + throwing_errno_check(-1 == unlink(name_template), __FILE__, __func__, __LINE__); +} + + static std::pair refreshEventStatus(ihipEventData_t &ecd) { if (ecd._state == hipEventStatusRecording && ecd.marker().is_ready()) { if ((ecd._type == hipEventTypeIndependent) || @@ -75,9 +180,9 @@ static std::pair refreshEventStatus(ihipEventData_t hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) { hipError_t e = hipSuccess; - // TODO-IPC - support hipEventInterprocess. unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming | - hipEventReleaseToDevice | hipEventReleaseToSystem; + hipEventReleaseToDevice | hipEventReleaseToSystem | + hipEventInterprocess; const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem); const bool illegalFlags = @@ -105,29 +210,66 @@ hipError_t hipEventCreate(hipEvent_t* event) { return ihipLogStatus(ihipEventCreate(event, 0)); } - hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_INIT_SPECIAL_API(hipEventRecord, TRACE_SYNC, event, stream); if (!event) return ihipLogStatus(hipErrorInvalidHandle); stream = ihipSyncAndResolveStream(stream); LockedAccessor_EventCrit_t eCrit(event->criticalData()); - if (eCrit->_eventData._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle); + auto &ecd{eCrit->_eventData}; + if (ecd._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle); if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) { // TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0 // If default stream , then wait on all queues. ihipCtx_t* ctx = ihipGetTlsDefaultCtx(); ctx->locked_syncDefaultStream(true, true); - eCrit->_eventData.marker(hc::completion_future()); // reset event - eCrit->_eventData._stream = stream; - eCrit->_eventData._timestamp = hc::get_system_ticks(); - eCrit->_eventData._state = hipEventStatusComplete; + ecd.marker(hc::completion_future()); // reset event + ecd._stream = stream; + ecd._timestamp = hc::get_system_ticks(); + ecd._state = hipEventStatusComplete; + // TODO handle IPC case? } else { // Record the event in the stream: - eCrit->_eventData.marker(stream->locked_recordEvent(event)); - eCrit->_eventData._stream = stream; - eCrit->_eventData._timestamp = 0; - eCrit->_eventData._state = hipEventStatusRecording; + ecd.marker(stream->locked_recordEvent(event)); + ecd._stream = stream; + ecd._timestamp = 0; + ecd._state = hipEventStatusRecording; + if (event->_flags & hipEventInterprocess) { + createIpcEventShmemIfNeeded(ecd); + int write_index = ecd._ipc_shmem->write_index++; // fetch add + int offset = write_index % IPC_SIGNALS_PER_EVENT; + // While event still valid and still locked, spin. + while (ecd._ipc_shmem->signal[offset] != 0) { + // TODO backoff + } + // Lock signal. + ecd._ipc_shmem->signal[offset] = 1; + // forward signal state from local signal to IPC signal via host callback + // create callback that can be passed to hsa_amd_signal_async_handler + // this function decrements the IPC signal by 1 to indicate completion + std::atomic *signal = &ecd._ipc_shmem->signal[offset]; + auto t{new std::function{[=]() { + signal->store(0); + }}}; + // register above callback with HSA runtime to be called when local signal + // is decremented from 1 to 0 by CP + auto local_signal = *reinterpret_cast(eCrit->_eventData.marker().get_native_handle()); + hsa_amd_signal_async_handler(local_signal, HSA_SIGNAL_CONDITION_LT, 1, + [](hsa_signal_value_t x, void* p) { + (*static_cast(p))(); + delete static_cast(p); + return false; + }, t); + // Update read index to indicate new signal. + int expected = write_index-1; + while (!ecd._ipc_shmem->read_index.compare_exchange_weak(expected, write_index)) { + throwing_msg_check( + expected >= write_index, + "IPC event record update read index failure", + __FILE__, __func__, __LINE__); + expected = write_index-1; + } + } } return ihipLogStatus(hipSuccess); } @@ -137,8 +279,18 @@ hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(hipEventDestroy, event); if (event) { + { + LockedAccessor_EventCrit_t crit(event->criticalData()); + auto &ecd{crit->_eventData}; + if (ecd._ipc_shmem) { + int owners = --ecd._ipc_shmem->owners; + throwing_errno_check(-1 == munmap(ecd._ipc_shmem, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__); + throwing_errno_check(-1 == close(ecd._ipc_fd), __FILE__, __func__, __LINE__); + if (0 == owners) + throwing_errno_check(-1 == shm_unlink(ecd._ipc_name.c_str()), __FILE__, __func__, __LINE__); + } + } delete event; - return ihipLogStatus(hipSuccess); } else { return ihipLogStatus(hipErrorInvalidHandle); @@ -148,31 +300,44 @@ hipError_t hipEventDestroy(hipEvent_t event) { hipError_t hipEventSynchronize(hipEvent_t event) { HIP_INIT_SPECIAL_API(hipEventSynchronize, TRACE_SYNC, event); - if (event){ - if (!(event->_flags & hipEventReleaseToSystem)) { - tprintf(DB_WARN, - "hipEventSynchronize on event without system-scope fence ; consider creating with " - "hipEventReleaseToSystem\n"); - } - auto ecd = event->locked_copyCrit(); + if (!event) return ihipLogStatus(hipErrorInvalidHandle); - if (ecd._state == hipEventStatusUnitialized) { - return ihipLogStatus(hipErrorInvalidHandle); - } else if (ecd._state == hipEventStatusCreated) { - // Created but not actually recorded on any device: - return ihipLogStatus(hipSuccess); - } else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) { - auto* ctx = ihipGetTlsDefaultCtx(); - // TODO-HIP_SYNC_NULL_STREAM - can remove this code - ctx->locked_syncDefaultStream(true, true); - return ihipLogStatus(hipSuccess); - } else { - ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked - : hc::hcWaitModeActive); - return ihipLogStatus(hipSuccess); + if (!(event->_flags & hipEventReleaseToSystem)) { + tprintf(DB_WARN, + "hipEventSynchronize on event without system-scope fence ; consider creating with " + "hipEventReleaseToSystem\n"); + } + + auto ecd = event->locked_copyCrit(); + + if (event->_flags & hipEventInterprocess) { + // this is an IPC event + int previous_read_index = ecd._ipc_shmem->read_index; + if (previous_read_index >= 0) { + // we have at least one recorded event, so proceed + int offset = previous_read_index % IPC_SIGNALS_PER_EVENT; + // While event still valid and still locked, spin. + while (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) { + // TODO backoff + } } - } else { + return ihipLogStatus(hipSuccess); + } + + if (ecd._state == hipEventStatusUnitialized) { return ihipLogStatus(hipErrorInvalidHandle); + } else if (ecd._state == hipEventStatusCreated) { + // Created but not actually recorded on any device: + return ihipLogStatus(hipSuccess); + } else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) { + auto* ctx = ihipGetTlsDefaultCtx(); + // TODO-HIP_SYNC_NULL_STREAM - can remove this code + ctx->locked_syncDefaultStream(true, true); + return ihipLogStatus(hipSuccess); + } else { + ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked + : hc::hcWaitModeActive); + return ihipLogStatus(hipSuccess); } } @@ -239,9 +404,80 @@ hipError_t hipEventQuery(hipEvent_t event) { auto ecd = event->locked_copyCrit(); - if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) { - return ihipLogStatus(hipErrorNotReady); + // this event is either from an ipc handle, or the owner of a local ipc event + if (event->_flags & hipEventInterprocess) { + if (ecd._ipc_shmem) { + int previous_read_index = ecd._ipc_shmem->read_index; + int offset = previous_read_index % IPC_SIGNALS_PER_EVENT; + if (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) { + return ihipLogStatus(hipErrorNotReady); + } + else { + return ihipLogStatus(hipSuccess); + } + } + } + // normal event + else { + if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) { + return ihipLogStatus(hipErrorNotReady); + } } return ihipLogStatus(hipSuccess); } + +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) +{ + HIP_INIT_API(hipIpcGetEventHandle, handle, event); + +#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2 + if (!handle) return ihipLogStatus(hipErrorInvalidHandle); + if (!event) return ihipLogStatus(hipErrorInvalidHandle); + if (!(event->_flags & hipEventInterprocess)) return ihipLogStatus(hipErrorInvalidHandle); + if (!(event->_flags & hipEventDisableTiming)) return ihipLogStatus(hipErrorInvalidHandle); + + LockedAccessor_EventCrit_t crit(event->criticalData()); + + auto &ecd{crit->_eventData}; + createIpcEventShmemIfNeeded(ecd); + // copy name into handle + ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)handle; + memset(iHandle->shmem_name, 0, HIP_IPC_HANDLE_SIZE); + ecd._ipc_name.copy(iHandle->shmem_name, std::string::npos); + + return ihipLogStatus(hipSuccess); +#else + return ihipLogStatus(hipErrorNotSupported); +#endif +} + +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) +{ + HIP_INIT_API(hipIpcOpenEventHandle, event, &handle); + +#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2 + if (!event) return ihipLogStatus(hipErrorInvalidHandle); + + // create a new event with timing disabled, per spec + auto hip_status = ihipEventCreate(event, hipEventDisableTiming | hipEventInterprocess); + if (hip_status != hipSuccess) return ihipLogStatus(hip_status); + + LockedAccessor_EventCrit_t crit((*event)->criticalData()); + auto &ecd{crit->_eventData}; + ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)&handle; + ecd._ipc_name = iHandle->shmem_name; + // open shmem + ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR, 0777); + throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__); + // mmap it + ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0); + throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__); + // update shared state + ecd._ipc_shmem->owners += 1; + + return ihipLogStatus(hipSuccess); +#else + return ihipLogStatus(hipErrorNotSupported); +#endif +} diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 2fd40903d7..5159254d57 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -332,12 +332,55 @@ void ihipStream_t::locked_wait() { locked_wait(waited); }; +typedef struct { + int previous_read_index; + ihipIpcEventShmem_t *shmem; + hsa_signal_t signal; +} callback_data_t; + +static void WaitThenDecrementSignal(callback_data_t *data) { + int offset = data->previous_read_index % IPC_SIGNALS_PER_EVENT; + // While event valid and locked, spin. + while (data->shmem->read_index < data->previous_read_index+IPC_SIGNALS_PER_EVENT && data->shmem->signal[offset] != 0) { + } + hsa_signal_store_relaxed(data->signal, 0); + delete data; +} + // Causes current stream to wait for specified event to complete: // Note this does not provide any kind of host serialization. void ihipStream_t::locked_streamWaitEvent(ihipEventData_t& ecd) { LockedAccessor_StreamCrit_t crit(_criticalData); - crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope); + // if event is an IPC event, it doesn't have a marker + // we use a host callback to block stream with a signal wait + if (ecd._ipc_shmem) { + // create first marker + auto cf = crit->_av.create_marker(hc::no_scope); + // get its signal + auto signal = *reinterpret_cast(cf.get_native_handle()); + // increment its signal value + hsa_signal_add_relaxed(signal, 1); + + // create callback that can be passed to hsa_amd_signal_async_handler + // this function will host wait on IPC signal, then sets first packet's signal to 0 to indicate completion + auto t{new callback_data_t{ecd._ipc_shmem->read_index, ecd._ipc_shmem, signal}}; + + // register above callback with HSA runtime to be called when first packet's signal + // is decremented from 2 to 1 by CP (or it is already at 1) + // the HSA async handler is single threaded, so we can't block, therefore use a detached thread + hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1, + [](hsa_signal_value_t x, void* p) { + std::thread(WaitThenDecrementSignal, static_cast(p)).detach(); + return false; + }, t); + + // create additional marker that blocks on the first one + crit->_av.create_blocking_marker(cf, hc::accelerator_scope); + } + else { + crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope); + } } diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 2c3fb25b3a..b1777955aa 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -389,18 +389,28 @@ const hipStream_t hipStreamNull = 0x0; /** - * HIP IPC Handle Size + * HIP IPC Mem Handle Size */ -#define HIP_IPC_RESERVED_SIZE 24 +#define HIP_IPC_MEM_RESERVED_SIZE 24 class ihipIpcMemHandle_t { public: #if USE_IPC hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr #endif size_t psize; - char reserved[HIP_IPC_RESERVED_SIZE]; + char reserved[HIP_IPC_MEM_RESERVED_SIZE]; }; +/** + * HIP IPC Event Handle Size + */ +#define HIP_IPC_EVENT_RESERVED_SIZE 32 +class ihipIpcEventHandle_t { + public: +#if USE_IPC + char shmem_name[HIP_IPC_HANDLE_SIZE]; +#endif +}; struct ihipModule_t { std::string fileName; @@ -686,6 +696,14 @@ enum ihipEventType_t { hipEventTypeStopCommand, }; +#define IPC_SIGNALS_PER_EVENT 32 +typedef struct ihipIpcEventShmem_s { + std::atomic owners; + std::atomic read_index; + std::atomic write_index; + std::atomic signal[IPC_SIGNALS_PER_EVENT]; +} ihipIpcEventShmem_t; + struct ihipEventData_t { ihipEventData_t() { @@ -693,18 +711,24 @@ struct ihipEventData_t { _stream = NULL; _timestamp = 0; _type = hipEventTypeIndependent; + _ipc_name = ""; + _ipc_fd = 0; + _ipc_shmem = NULL; }; - void marker(const hc::completion_future& marker) { _marker = marker; }; + void marker(const hc::completion_future& marker) { _marker = marker; } hc::completion_future& marker() { return _marker; } - uint64_t timestamp() const { return _timestamp; }; - ihipEventType_t type() const { return _type; }; + uint64_t timestamp() const { return _timestamp; } + ihipEventType_t type() const { return _type; } ihipEventType_t _type; hipEventStatus_t _state; hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual // stream when recorded uint64_t _timestamp; // store timestamp, may be set on host or by marker. + std::string _ipc_name; + int _ipc_fd; + ihipIpcEventShmem_t *_ipc_shmem; private: hc::completion_future _marker; }; @@ -716,7 +740,7 @@ template class ihipEventCriticalBase_t : LockedBase { public: explicit ihipEventCriticalBase_t(const ihipEvent_t* parentEvent) : _parent(parentEvent) {} - ~ihipEventCriticalBase_t(){}; + ~ihipEventCriticalBase_t() {} // Keep data in structure so it can be easily copied into snapshots // (used to reduce lock contention and preserve correct lock order) diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index 63551d1204..5b56b71cd8 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -63,11 +63,11 @@ hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flag // TODO - se try-catch loop to detect memory exception? // - // Note this is an execute_any_order queue, + // Note this is an execute_any_order queue, // CUDA stream behavior is that all kernels submitted will automatically - // wait for prev to complete, this behaviour will be mainatined by - // hipModuleLaunchKernel. execute_any_order will help - // hipExtModuleLaunchKernel , which uses a special flag + // wait for prev to complete, this behaviour will be mainatined by + // hipModuleLaunchKernel. execute_any_order will help + // hipExtModuleLaunchKernel , which uses a special flag { // Obtain mutex access to the device critical data, release by destructor @@ -130,18 +130,19 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_SPECIAL_API(hipStreamWaitEvent, TRACE_SYNC, stream, event, flags); - hipError_t e = hipSuccess; + if (!event) return ihipLogStatus(hipErrorInvalidHandle); - if (event == nullptr) { - e = hipErrorInvalidHandle; - - } else { - auto ecd = event->locked_copyCrit(); + auto ecd = event->locked_copyCrit(); + if (event->_flags & hipEventInterprocess) { + // this is an IPC event + if (ecd._ipc_shmem->read_index >= 0) { + // we have at least one recorded event, so proceed + stream->locked_streamWaitEvent(ecd); + } + } + else { if ((ecd._state != hipEventStatusUnitialized) && (ecd._state != hipEventStatusCreated)) { if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) { - // conservative wait on host for the specified event to complete: - // return _stream->locked_eventWaitComplete(this, waitMode); - // ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); } else { @@ -150,9 +151,9 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int stream->locked_streamWaitEvent(ecd); } } - } // else event not recorded, return immediately and don't create marker. + } - return ihipLogStatus(e); + return ihipLogStatus(hipSuccess); }; diff --git a/src/trace_helper.h b/src/trace_helper.h index 84f218a438..202a302f70 100644 --- a/src/trace_helper.h +++ b/src/trace_helper.h @@ -71,6 +71,11 @@ inline std::string ToString(hipEvent_t v) { ss << v; return ss.str(); }; +// hipIpcEventHandle_t specialization. TODO +template <> +inline std::string ToString(hipIpcEventHandle_t v) { + return std::string{}; +}; // hipStream_t template <> inline std::string ToString(hipStream_t v) { diff --git a/tests/src/runtimeApi/event/hipEventIpc.cpp b/tests/src/runtimeApi/event/hipEventIpc.cpp new file mode 100644 index 0000000000..b62e0a16aa --- /dev/null +++ b/tests/src/runtimeApi/event/hipEventIpc.cpp @@ -0,0 +1,112 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Test hipEventRecord serialization behavior. +// Through manual inspection of the reported timestamps, can determine if recording a NULL event +// forces synchronization : set + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t --iterations 10 + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "test_common.h" + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock; + if (blocks > 1024) blocks = 1024; + if (blocks == 0) blocks = 1; + + printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N, + ((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations); + printf("iterations=%d\n", iterations); + + size_t Nbytes = N * sizeof(float); + + float *A_h, *B_h, *C_h; + float *A_d, *B_d, *C_d; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); + + hipEvent_t start, stop; + + // NULL stream check: + HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess)); + HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess)); + + + HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + + + for (int i = 0; i < iterations; i++) { + //--- START TIMED REGION + long long hostStart = HipTest::get_time(); + // Record the start event + HIPCHECK(hipEventRecord(start, NULL)); + + hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, + static_cast(A_d), static_cast(B_d), C_d, N); + + + HIPCHECK(hipEventRecord(stop, NULL)); + HIPCHECK(hipEventSynchronize(stop)); + HIPCHECK(hipEventQuery(stop)); + long long hostStop = HipTest::get_time(); + //--- STOP TIMED REGION + + + float eventMs = 1.0f; + // should fail + HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop)); + float hostMs = HipTest::elapsed_time(hostStart, hostStop); + + printf("host_time (gettimeofday) =%6.3fms\n", hostMs); + printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); + printf("\n"); + + } + + hipIpcEventHandle_t ipc_handle; + HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start)); + + hipEvent_t ipc_event; + HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle)); + + HIPCHECK(hipEventSynchronize(ipc_event)); + + HIPCHECK(hipEventDestroy(ipc_event)); + HIPCHECK(hipEventDestroy(start)); + HIPCHECK(hipEventDestroy(stop)); + + HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + + + printf("check:\n"); + + HipTest::checkVectorADD(A_h, B_h, C_h, N, true); + + + passed(); +} From e0364d23ff399931a96b9d978bfca789d8d129e0 Mon Sep 17 00:00:00 2001 From: Sarbojit2019 <52527887+SarbojitAMD@users.noreply.github.com> Date: Fri, 17 Apr 2020 10:31:47 +0530 Subject: [PATCH 094/132] [HIPTEST]common changes for unittest (#2017) --- CMakeLists.txt | 6 +- tests/unit/test_common.cpp | 180 ++++++++++++++ tests/unit/test_common.h | 474 +++++++++++++++++++++++++++++++++++++ 3 files changed, 659 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_common.cpp create mode 100644 tests/unit/test_common.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4061b163ff..b56c47af30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -552,8 +552,12 @@ if(${RUN_HIT} EQUAL 0) include_directories(${HIP_SRC_PATH}/tests/src) hit_add_directory_recursive(${HIP_SRC_PATH}/tests/src "directed_tests") + # Add unit tests + include_directories(${HIP_SRC_PATH}/tests/unit) + hit_add_directory_recursive(${HIP_SRC_PATH}/tests/unit "unit_tests") + # Add top-level tests to build_tests - add_custom_target(build_tests DEPENDS directed_tests) + add_custom_target(build_tests DEPENDS directed_tests unit_tests) # Add custom target: check add_custom_target(check COMMAND "${CMAKE_COMMAND}" --build . --target test DEPENDS build_tests) diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp new file mode 100644 index 0000000000..1c0dcc8c34 --- /dev/null +++ b/tests/unit/test_common.cpp @@ -0,0 +1,180 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "test_common.h" + +// standard global variables that can be set on command line +size_t N = 4 * 1024 * 1024; +char memsetval = 0x42; +int memsetD32val = 0xDEADBEEF; +short memsetD16val = 0xDEAD; +char memsetD8val = 0xDE; +int iterations = 1; +unsigned blocksPerCU = 6; // to hide latency +unsigned threadsPerBlock = 256; +int p_gpuDevice = 0; +unsigned p_verbose = 0; +int p_tests = -1; /*which tests to run. Interpretation is left to each test. default:all*/ +#ifdef _WIN64 +const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES="; +const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES="; +const char* PATH_SEPERATOR_STR = "\\"; +const char* NULL_DEVICE = "NUL:"; +#else +const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES"; +const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES"; +const char* PATH_SEPERATOR_STR = "/"; +const char* NULL_DEVICE = "/dev/null"; +#endif + +namespace HipTest { + + +double elapsed_time(long long startTimeUs, long long stopTimeUs) { + return ((double)(stopTimeUs - startTimeUs)) / ((double)(1000)); +} + + +int parseSize(const char* str, size_t* output) { + char* next; + *output = strtoull(str, &next, 0); + int l = strlen(str); + if (l) { + char c = str[l - 1]; // last char. + if ((c == 'k') || (c == 'K')) { + *output *= 1024; + } + if ((c == 'm') || (c == 'M')) { + *output *= (1024 * 1024); + } + if ((c == 'g') || (c == 'G')) { + *output *= (1024 * 1024 * 1024); + } + } + return 1; +} + + +int parseUInt(const char* str, unsigned int* output) { + char* next; + *output = strtoul(str, &next, 0); + return !strlen(next); +} + + +int parseInt(const char* str, int* output) { + char* next; + *output = strtol(str, &next, 0); + return !strlen(next); +} + + +int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg) { + int extraArgs = 1; + for (int i = 1; i < argc; i++) { + const char* arg = argv[i]; + + if (!strcmp(arg, " ")) { + // skip NULL args. + } else if (!strcmp(arg, "--N") || (!strcmp(arg, "-N"))) { + if (++i >= argc || !HipTest::parseSize(argv[i], &N)) { + failed("Bad N size argument"); + } + } else if (!strcmp(arg, "--threadsPerBlock")) { + if (++i >= argc || !HipTest::parseUInt(argv[i], &threadsPerBlock)) { + failed("Bad threadsPerBlock argument"); + } + } else if (!strcmp(arg, "--blocksPerCU")) { + if (++i >= argc || !HipTest::parseUInt(argv[i], &blocksPerCU)) { + failed("Bad blocksPerCU argument"); + } + } else if (!strcmp(arg, "--memsetval")) { + int ex; + if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetval argument"); + } + memsetval = ex; + } else if (!strcmp(arg, "--memsetD32val")) { + int ex; + if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetD32val argument"); + } + memsetD32val = ex; + } else if (!strcmp(arg, "--memsetD16val")) { + int ex; + if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetD16val argument"); + } + memsetD16val = ex; + } else if (!strcmp(arg, "--memsetD8val")) { + int ex; + if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetD8val argument"); + } + memsetD8val = ex; + } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { + if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) { + failed("Bad iterations argument"); + } + + } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-gpuDevice")) || (!strcmp(arg, "-g"))) { + if (++i >= argc || !HipTest::parseInt(argv[i], &p_gpuDevice)) { + failed("Bad gpuDevice argument"); + } + + } else if (!strcmp(arg, "--verbose") || (!strcmp(arg, "-v"))) { + if (++i >= argc || !HipTest::parseUInt(argv[i], &p_verbose)) { + failed("Bad verbose argument"); + } + } else if (!strcmp(arg, "--tests") || (!strcmp(arg, "-t"))) { + if (++i >= argc || !HipTest::parseInt(argv[i], &p_tests)) { + failed("Bad tests argument"); + } + + } else { + if (failOnUndefinedArg) { + failed("Bad argument '%s'", arg); + } else { + argv[extraArgs++] = argv[i]; + } + } + }; + + return extraArgs; +} + + +unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) { + int device; + HIPCHECK(hipGetDevice(&device)); + hipDeviceProp_t props; + HIPCHECK(hipGetDeviceProperties(&props, device)); + + unsigned blocks = props.multiProcessorCount * blocksPerCU; + if (blocks * threadsPerBlock > N) { + blocks = (N + threadsPerBlock - 1) / threadsPerBlock; + } + + return blocks; +} + + +} // namespace HipTest diff --git a/tests/unit/test_common.h b/tests/unit/test_common.h new file mode 100644 index 0000000000..4b55c70164 --- /dev/null +++ b/tests/unit/test_common.h @@ -0,0 +1,474 @@ +/* +Copyright (c) 2020-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * File is intended to C and CPP compliant hence any CPP specic changes + * should be added into CPP section + * + */ + +#ifdef __cplusplus + #include + #include + #if __CUDACC__ + #include + #else + #include + #endif +#endif + +// ************************ GCC section ************************** +#include + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" + +#define HC __attribute__((hc)) + + +#define KNRM "\x1B[0m" +#define KRED "\x1B[31m" +#define KGRN "\x1B[32m" +#define KYEL "\x1B[33m" +#define KBLU "\x1B[34m" +#define KMAG "\x1B[35m" +#define KCYN "\x1B[36m" +#define KWHT "\x1B[37m" + +#define passed() \ + printf("%sPASSED!%s\n", KGRN, KNRM); \ + +#define failed(...) \ + printf("%serror: ", KRED); \ + printf(__VA_ARGS__); \ + printf("%s\n",KNRM); \ + return false; + +#define warn(...) \ + printf("%swarn: ", KYEL); \ + printf(__VA_ARGS__); \ + printf("\n"); \ + printf("warn: TEST WARNING\n%s", KNRM); + +#define skipped() printf("%sSkipped subtest %s%s\n",KYEL,__FUNCTION__,KNRM); + +#define HIPCHECK(error) \ + { \ + hipError_t localError = error; \ + if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \ + failed("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), \ + localError, #error, __FUNCTION__, __LINE__, KNRM); \ + } \ + } + +#define HIPASSERT(condition) \ + if (!(condition)) { \ + failed("%sassertion %s at %s:%d%s \n", KRED, #condition, __FUNCTION__, __LINE__, KNRM); \ + } + + +#define HIPCHECK_API(API_CALL, EXPECTED_ERROR) \ + { \ + hipError_t _e = (API_CALL); \ + if (_e != (EXPECTED_ERROR)) { \ + failed("%sAPI '%s' returned %d(%s) but test expected %d(%s) at %s:%d%s \n", KRED, \ + #API_CALL, _e, hipGetErrorName(_e), EXPECTED_ERROR, \ + hipGetErrorName(EXPECTED_ERROR), __FILE__, __LINE__, KNRM); \ + } \ + } + +#ifdef _WIN64 +#include +#define aligned_alloc(x,y) _aligned_malloc(y,x) +#define aligned_free(x) _aligned_free(x) +#define popen(x,y) _popen(x,y) +#define pclose(x) _pclose(x) +#define setenv(x,y,z) _putenv_s(x,y) +#define unsetenv _putenv +#define fileno(x) _fileno(x) +#define dup(x) _dup(x) +#define dup2(x,y) _dup2(x,y) +#define close(x) _close(x) +#else +#define aligned_free(x) free(x) +#endif + +// standard command-line variables: +extern size_t N; +extern char memsetval; +extern int memsetD32val; +extern short memsetD16val; +extern char memsetD8val; +extern int iterations; +extern unsigned blocksPerCU; +extern unsigned threadsPerBlock; +extern int p_gpuDevice; +extern unsigned p_verbose; +extern int p_tests; +extern const char* HIP_VISIBLE_DEVICES_STR; +extern const char* CUDA_VISIBLE_DEVICES_STR; +extern const char* PATH_SEPERATOR_STR; +extern const char* NULL_DEVICE; + +// ********************* CPP section ********************* +#ifdef __cplusplus + +#ifdef __HIP_PLATFORM_HCC +#define TYPENAME(T) typeid(T).name() +#else +#define TYPENAME(T) "?" +#endif + +namespace HipTest { + +// Returns the current system time in microseconds +inline long long get_time() { +#if __CUDACC__ + struct timeval tv; + gettimeofday(&tv, 0); + return (tv.tv_sec * 1000000) + tv.tv_usec; +#else + return std::chrono::high_resolution_clock::now().time_since_epoch() + /std::chrono::microseconds(1); +#endif +} + +double elapsed_time(long long startTimeUs, long long stopTimeUs); + +int parseSize(const char* str, size_t* output); +int parseUInt(const char* str, unsigned int* output); +int parseInt(const char* str, int* output); +int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg); + +unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N); + +template // pointer type +bool checkArray(T hData, T hOutputData, size_t width, size_t height,size_t depth) +{ + for (int i = 0; i < depth; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + int offset = i*width*height + j*width + k; + if (hData[offset] != hOutputData[offset]) { + std::cerr << '[' << i << ',' << j << ',' << k << "]:" << hData[offset] << "----" << hOutputData[offset]<<" "; + failed("mistmatch at:%d %d %d",i,j,k); + } + } + } + } + return true; +} + +template +bool checkArray(T input, T output, size_t height, size_t width) +{ + for(int i=0; i +__global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < NELEM; i += stride) { + C_d[i] = A_d[i] + B_d[i]; + } +} + + +template +__global__ void vectorADDReverse(const T* A_d, const T* B_d, T* C_d, + size_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { + C_d[i] = A_d[i] + B_d[i]; + } +} + + +template +__global__ void addCount(const T* A_d, T* C_d, size_t NELEM, int count) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + // Deliberately do this in an inefficient way to increase kernel runtime + for (int i = 0; i < count; i++) { + for (size_t i = offset; i < NELEM; i += stride) { + C_d[i] = A_d[i] + (T)count; + } + } +} + + +template +__global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + // Deliberately do this in an inefficient way to increase kernel runtime + for (int i = 0; i < count; i++) { + for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { + C_d[i] = A_d[i] + (T)count; + } + } +} + + +template +__global__ void memsetReverse(T* C_d, T val, int64_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { + C_d[i] = val; + } +} + + +template +void setDefaultData(size_t numElements, T* A_h, T* B_h, T* C_h) { + // Initialize the host data: + for (size_t i = 0; i < numElements; i++) { + if (A_h) (A_h)[i] = 3.146f + i; // Pi + if (B_h) (B_h)[i] = 1.618f + i; // Phi + if (C_h) (C_h)[i] = 0.0f + i; + } +} + + +template +bool initArraysForHost(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) { + size_t Nbytes = N * sizeof(T); + + if (usePinnedHost) { + if (A_h) { + HIPCHECK(hipHostMalloc((void**)A_h, Nbytes)); + } + if (B_h) { + HIPCHECK(hipHostMalloc((void**)B_h, Nbytes)); + } + if (C_h) { + HIPCHECK(hipHostMalloc((void**)C_h, Nbytes)); + } + } else { + if (A_h) { + *A_h = (T*)malloc(Nbytes); + HIPASSERT(*A_h != NULL); + } + + if (B_h) { + *B_h = (T*)malloc(Nbytes); + HIPASSERT(*B_h != NULL); + } + + if (C_h) { + *C_h = (T*)malloc(Nbytes); + HIPASSERT(*C_h != NULL); + } + } + + setDefaultData(N, A_h ? *A_h : NULL, B_h ? *B_h : NULL, C_h ? *C_h : NULL); + return true; +} + + +template +bool initArrays(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N, + bool usePinnedHost = false) { + size_t Nbytes = N * sizeof(T); + + if (A_d) { + HIPCHECK(hipMalloc(A_d, Nbytes)); + } + if (B_d) { + HIPCHECK(hipMalloc(B_d, Nbytes)); + } + if (C_d) { + HIPCHECK(hipMalloc(C_d, Nbytes)); + } + + return initArraysForHost(A_h, B_h, C_h, N, usePinnedHost); +} + + +template +bool freeArraysForHost(T* A_h, T* B_h, T* C_h, bool usePinnedHost) { + if (usePinnedHost) { + if (A_h) { + HIPCHECK(hipHostFree(A_h)); + } + if (B_h) { + HIPCHECK(hipHostFree(B_h)); + } + if (C_h) { + HIPCHECK(hipHostFree(C_h)); + } + } else { + if (A_h) { + free(A_h); + } + if (B_h) { + free(B_h); + } + if (C_h) { + free(C_h); + } + } + return true; +} + +template +bool freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) { + if (A_d) { + HIPCHECK(hipFree(A_d)); + } + if (B_d) { + HIPCHECK(hipFree(B_d)); + } + if (C_d) { + HIPCHECK(hipFree(C_d)); + } + + return freeArraysForHost(A_h, B_h, C_h, usePinnedHost); +} + +#if defined(__HIP_PLATFORM_HCC__) +template +bool initArrays2DPitch(T** A_d, T** B_d, T** C_d, size_t* pitch_A, size_t* pitch_B, size_t* pitch_C, + size_t numW, size_t numH) { + if (A_d) { + HIPCHECK(hipMallocPitch((void**)A_d, pitch_A, numW * sizeof(T), numH)); + } + if (B_d) { + HIPCHECK(hipMallocPitch((void**)B_d, pitch_B, numW * sizeof(T), numH)); + } + if (C_d) { + HIPCHECK(hipMallocPitch((void**)C_d, pitch_C, numW * sizeof(T), numH)); + } + + HIPASSERT(*pitch_A == *pitch_B); + HIPASSERT(*pitch_A == *pitch_C) + return true; +} + +inline bool initHIPArrays(hipArray** A_d, hipArray** B_d, hipArray** C_d, + const hipChannelFormatDesc* desc, const size_t numW, const size_t numH, + const unsigned int flags) { + if (A_d) { + HIPCHECK(hipMallocArray(A_d, desc, numW, numH, flags)); + } + if (B_d) { + HIPCHECK(hipMallocArray(B_d, desc, numW, numH, flags)); + } + if (C_d) { + HIPCHECK(hipMallocArray(C_d, desc, numW, numH, flags)); + } + return true; +} +#endif + +// Assumes C_h contains vector add of A_h + B_h +// Calls the test "failed" macro if a mismatch is detected. +template +size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true, + bool reportMismatch = true) { + size_t mismatchCount = 0; + size_t firstMismatch = 0; + size_t mismatchesToPrint = 10; + for (size_t i = 0; i < N; i++) { + T expected = A_h[i] + B_h[i]; + if (result_H[i] != expected) { + if (mismatchCount == 0) { + firstMismatch = i; + } + mismatchCount++; + if ((mismatchCount <= mismatchesToPrint) && expectMatch) { + std::cout << std::fixed << std::setprecision(32); + std::cout << "At " << i << std::endl; + std::cout << " Computed:" << result_H[i] << std::endl; + std::cout << " Expected:" << expected << std::endl; + } + } + } + + if (reportMismatch) { + if (expectMatch) { + if (mismatchCount) { + failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); + } + } else { + if (mismatchCount == 0) { + failed("expected mismatches but did not detect any!"); + } + } + } + + return mismatchCount; +} + + +// Assumes C_h contains vector add of A_h + B_h +// Calls the test "failed" macro if a mismatch is detected. +template +bool checkTest(T* expected_H, T* result_H, size_t N, bool expectMatch = true) { + size_t mismatchCount = 0; + size_t firstMismatch = 0; + size_t mismatchesToPrint = 10; + for (size_t i = 0; i < N; i++) { + if (result_H[i] != expected_H[i]) { + if (mismatchCount == 0) { + firstMismatch = i; + } + mismatchCount++; + if ((mismatchCount <= mismatchesToPrint) && expectMatch) { + std::cout << std::fixed << std::setprecision(32); + std::cout << "At " << i << std::endl; + std::cout << " Computed:" << result_H[i] << std::endl; + std::cout << " Expected:" << expected_H[i] << std::endl; + } + } + } + + if (expectMatch) { + if (mismatchCount) { + fprintf(stderr, "%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); + // failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); + } + } else { + if (mismatchCount == 0) { + failed("expected mismatches but did not detect any!"); + } + } + return true; +} + +}; // namespace HipTest +#endif //__cplusplus From 386a0e0123d67b95b4c0ebb3ebcf1d1615758146 Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Fri, 17 Apr 2020 01:03:24 -0400 Subject: [PATCH 095/132] disable printf on hip-clang on Windows (#2021) --- include/hip/hcc_detail/device_functions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 0a775df275..4e07349e85 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -34,7 +34,7 @@ THE SOFTWARE. #include #include -#if __HIP_CLANG_ONLY__ && __HIP_VDI__ +#if __HIP_CLANG_ONLY__ && __HIP_VDI__ && !_WIN32 extern "C" __device__ int printf(const char *fmt, ...); #else #if HC_FEATURE_PRINTF From f246761deebdf92254852e48580804f1868e0d29 Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Fri, 17 Apr 2020 10:13:08 -0500 Subject: [PATCH 096/132] Revert "Solve issues with hip-vdi runtime static lib" This reverts commit 4c2ab3f41e8f5de16e16ee42c708a26cb166da3d. Reason for revert: It is causing dkms-no-npi-hipclang broken. It is top priority to maintain dkms-no-npi-hipclang build, otherwise we lose track of regression analysis. So revert the change for now and recommit it after fixing it. Change-Id: Ia5136e888baecb6148c6c18eedbf37066fcb1eaa --- bin/hipcc | 14 ++++---------- samples/0_Intro/bit_extract/Makefile | 7 +------ samples/0_Intro/square/Makefile | 7 ++----- vdi/CMakeLists.txt | 23 +++++------------------ vdi/hip_internal.hpp | 6 ------ vdi/hip_platform.cpp | 2 +- 6 files changed, 13 insertions(+), 46 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 25b9078cd6..5ed781bc60 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -211,7 +211,10 @@ if ($HIP_PLATFORM eq "clang") { $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPCFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPLDFLAGS .= " -L$HIP_LIB_PATH"; - if ($isWindows) { + if (not $isWindows) { + $HIPLDFLAGS .= " -Wl,--rpath-link=$HIP_LIB_PATH"; + $HIPLDFLAGS .= " -lhip_hcc"; + } else { $HIPLDFLAGS .= " -lamdhip64"; } if ($HIP_CLANG_HCC_COMPAT_MODE) { @@ -477,7 +480,6 @@ foreach $arg (@ARGV) { $linkType = 0; $setLinkType = 1; - $swallowArg = 1; } if(($trimarg eq '-use-sharedlib') and ($setLinkType eq 0)) { @@ -768,14 +770,6 @@ if ($HIP_PLATFORM eq "clang") { if (not $isWindows) { $HIPLDFLAGS .= " -lgcc_s -lgcc -lpthread -lm"; } - - if (not $isWindows and not $compileOnly) { - if ($linkType eq 0) { - $toolArgs .= " -L$HIP_LIB_PATH -lamdhip64_static -L$ROCM_PATH/lib -lhsa-runtime64 -ldl "; - } else { - $toolArgs .= " -Wl,--enable-new-dtags -Wl,--rpath=$HIP_LIB_PATH:$ROCM_PATH/lib -lhip_hcc "; - } - } } diff --git a/samples/0_Intro/bit_extract/Makefile b/samples/0_Intro/bit_extract/Makefile index 4a3a0bb4fe..08bca6e642 100644 --- a/samples/0_Intro/bit_extract/Makefile +++ b/samples/0_Intro/bit_extract/Makefile @@ -13,15 +13,10 @@ ifeq (${HIP_PLATFORM}, nvcc) endif EXE=bit_extract -EXE_STATIC=bit_extract_static $(EXE): bit_extract.cpp $(HIPCC) $(HIPCC_FLAGS) $< -o $@ -$(EXE_STATIC): bit_extract.cpp - $(HIPCC) -use-staticlib $(HIPCC_FLAGS) $< -o $@ - -all: $(EXE) $(EXE_STATIC) clean: - rm -f *.o $(EXE) $(EXE_STATIC) + rm -f *.o $(EXE) diff --git a/samples/0_Intro/square/Makefile b/samples/0_Intro/square/Makefile index aa046eeaaa..9bb0dd8205 100644 --- a/samples/0_Intro/square/Makefile +++ b/samples/0_Intro/square/Makefile @@ -11,7 +11,7 @@ else SOURCES=square.cpp endif -all: square.out square.out.static +all: square.out # Step square.cpp: square.cu @@ -20,8 +20,5 @@ square.cpp: square.cu square.out: $(SOURCES) $(HIPCC) $(CXXFLAGS) $(SOURCES) -o $@ -square.out.static: $(SOURCES) - $(HIPCC) -use-staticlib $(CXXFLAGS) $(SOURCES) -o $@ - clean: - rm -f *.o *.out *.out.static square.cpp + rm -f *.o *.out square.cpp diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index aa82dec373..8c1ca1f2de 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -152,33 +152,21 @@ add_library(amdhip64 SHARED $ ) -add_library(amdhip64_static_base STATIC +add_library(amdhip64_static STATIC $ ) add_library(host INTERFACE) target_link_libraries(host INTERFACE amdhip64) -target_link_libraries(host INTERFACE amdhip64_static_base) add_library(device INTERFACE) target_link_libraries(device INTERFACE host) -target_link_libraries(amdhip64_static_base PRIVATE amdvdi_static pthread dl) +target_link_libraries(amdhip64_static PRIVATE amdvdi_static pthread dl) target_link_libraries(amdhip64 PRIVATE amdvdi_static pthread dl) -set(STATICLIBNAME "${hip_BINARY_DIR}/lib/libamdhip64_static.a") - -add_custom_command( - OUTPUT ${STATICLIBNAME} - COMMAND rm -f ${STATICLIBNAME} - COMMAND ${CMAKE_AR} -rcsT ${STATICLIBNAME} $ $ - DEPENDS amdhip64_static_base amdvdi_static - COMMENT "Combining static libs into ${STATICLIBNAME} " -) - -add_custom_target(amdhip64_static ALL - DEPENDS ${STATICLIBNAME} -) +INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) +INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) INSTALL(CODE "execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink libamdhip64.so lib/libhip_hcc.so )" DESTINATION lib COMPONENT MAIN) INSTALL(CODE "execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink libamdhip64.so lib/libhiprtc.so )" DESTINATION lib COMPONENT MAIN) @@ -186,7 +174,6 @@ INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhip_hcc.so DESTINATION lib COMPONENT MA INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc.so DESTINATION lib COMPONENT MAIN) -INSTALL(PROGRAMS ${STATICLIBNAME} DESTINATION ${LIB_INSTALL_DIR}) -INSTALL(TARGETS amdhip64_static_base amdhip64 host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) +INSTALL(TARGETS amdhip64_static amdhip64 host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) INSTALL(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 10819350f5..9b4bd17042 100755 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -132,7 +132,6 @@ namespace hip { extern void init(); extern Device* getCurrentDevice(); - extern void setCurrentDevice(unsigned int index); /// Get VDI queue associated with hipStream @@ -256,11 +255,6 @@ private: ~PlatformState() {} public: static PlatformState& instance() { - if (platform_ == nullptr) { - // __hipRegisterFatBinary() will call this when app starts, thus - // there is no multiple entry issue here. - platform_ = new PlatformState(); - } return *platform_; } diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index 52ad36c29d..5ece473e06 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -30,7 +30,7 @@ constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" thread_local std::stack execStack_; -PlatformState* PlatformState::platform_ = nullptr; +PlatformState* PlatformState::platform_ = new PlatformState(); struct __CudaFatBinaryWrapper { unsigned int magic; From bf2e1bbac09a8932f95c17de7665b7e296a7ba5a Mon Sep 17 00:00:00 2001 From: Payam Date: Fri, 17 Apr 2020 13:41:16 -0400 Subject: [PATCH 097/132] Adding License file Change-Id: I9da5c36883a7b167e3bf312c7ed61af207c33c59 --- LICENSE => LICENSE.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) rename LICENSE => LICENSE.txt (85%) diff --git a/LICENSE b/LICENSE.txt similarity index 85% rename from LICENSE rename to LICENSE.txt index 586fbd5a39..e44ba39fd0 100644 --- a/LICENSE +++ b/LICENSE.txt @@ -1,5 +1,4 @@ -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2008-2020 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -13,11 +12,9 @@ all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - From da27fd2b09e67285a62624fdce2439bae5c55629 Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Mon, 6 Apr 2020 09:58:35 -0400 Subject: [PATCH 098/132] Solve issues with hip-vdi runtime static lib 1.Combine libamdhip64_static_base.a and libamdvdi_static.a into libamdhip64_static.a. 2.Let hipcc use -use-staticlib to link libamdhip64_static.a. 3.Add some samples for static lib. 4.Fix compiling failure of code object. Change-Id: Ic8c95228eb139058da8b5d66ba8439486154ca6f --- bin/hipcc | 14 ++++++++++---- samples/0_Intro/bit_extract/Makefile | 7 ++++++- samples/0_Intro/square/Makefile | 7 +++++-- vdi/CMakeLists.txt | 26 ++++++++++++++++++++++++-- vdi/hip_internal.hpp | 6 ++++++ vdi/hip_platform.cpp | 2 +- 6 files changed, 52 insertions(+), 10 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 5ed781bc60..25b9078cd6 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -211,10 +211,7 @@ if ($HIP_PLATFORM eq "clang") { $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPCFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPLDFLAGS .= " -L$HIP_LIB_PATH"; - if (not $isWindows) { - $HIPLDFLAGS .= " -Wl,--rpath-link=$HIP_LIB_PATH"; - $HIPLDFLAGS .= " -lhip_hcc"; - } else { + if ($isWindows) { $HIPLDFLAGS .= " -lamdhip64"; } if ($HIP_CLANG_HCC_COMPAT_MODE) { @@ -480,6 +477,7 @@ foreach $arg (@ARGV) { $linkType = 0; $setLinkType = 1; + $swallowArg = 1; } if(($trimarg eq '-use-sharedlib') and ($setLinkType eq 0)) { @@ -770,6 +768,14 @@ if ($HIP_PLATFORM eq "clang") { if (not $isWindows) { $HIPLDFLAGS .= " -lgcc_s -lgcc -lpthread -lm"; } + + if (not $isWindows and not $compileOnly) { + if ($linkType eq 0) { + $toolArgs .= " -L$HIP_LIB_PATH -lamdhip64_static -L$ROCM_PATH/lib -lhsa-runtime64 -ldl "; + } else { + $toolArgs .= " -Wl,--enable-new-dtags -Wl,--rpath=$HIP_LIB_PATH:$ROCM_PATH/lib -lhip_hcc "; + } + } } diff --git a/samples/0_Intro/bit_extract/Makefile b/samples/0_Intro/bit_extract/Makefile index 08bca6e642..4a3a0bb4fe 100644 --- a/samples/0_Intro/bit_extract/Makefile +++ b/samples/0_Intro/bit_extract/Makefile @@ -13,10 +13,15 @@ ifeq (${HIP_PLATFORM}, nvcc) endif EXE=bit_extract +EXE_STATIC=bit_extract_static $(EXE): bit_extract.cpp $(HIPCC) $(HIPCC_FLAGS) $< -o $@ +$(EXE_STATIC): bit_extract.cpp + $(HIPCC) -use-staticlib $(HIPCC_FLAGS) $< -o $@ + +all: $(EXE) $(EXE_STATIC) clean: - rm -f *.o $(EXE) + rm -f *.o $(EXE) $(EXE_STATIC) diff --git a/samples/0_Intro/square/Makefile b/samples/0_Intro/square/Makefile index 9bb0dd8205..aa046eeaaa 100644 --- a/samples/0_Intro/square/Makefile +++ b/samples/0_Intro/square/Makefile @@ -11,7 +11,7 @@ else SOURCES=square.cpp endif -all: square.out +all: square.out square.out.static # Step square.cpp: square.cu @@ -20,5 +20,8 @@ square.cpp: square.cu square.out: $(SOURCES) $(HIPCC) $(CXXFLAGS) $(SOURCES) -o $@ +square.out.static: $(SOURCES) + $(HIPCC) -use-staticlib $(CXXFLAGS) $(SOURCES) -o $@ + clean: - rm -f *.o *.out square.cpp + rm -f *.o *.out *.out.static square.cpp diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index 8c1ca1f2de..bfeff521d0 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -156,13 +156,35 @@ add_library(amdhip64_static STATIC $ ) +# We expect amdhip64_static to contain objects of vdi and hip. But linker +# let amdhip64_static contain objects of hip only. So we will use a +# a custom amdhip64_static_combiner to combine objects of vid and hip into +# amdhip64_static. To avoid amdhip64_static contains itself, +# amdhip64_static_temp is created internally. +add_library(amdhip64_static_temp STATIC + $ + ) + add_library(host INTERFACE) target_link_libraries(host INTERFACE amdhip64) add_library(device INTERFACE) target_link_libraries(device INTERFACE host) +# TODO: we may create host_static and device_static to let app +# link amdhip64_static -target_link_libraries(amdhip64_static PRIVATE amdvdi_static pthread dl) target_link_libraries(amdhip64 PRIVATE amdvdi_static pthread dl) +target_link_libraries(amdhip64_static PRIVATE pthread dl) +target_link_libraries(amdhip64_static_temp PRIVATE pthread dl) + +# combine objects of vid and hip into amdhip64_static +add_custom_target( + amdhip64_static_combiner + ALL + COMMAND rm -f $ # Must remove old one, otherwise the new one will contain obsolete stuff + COMMAND ${CMAKE_AR} -rcsT $ $ $ + DEPENDS amdhip64_static amdhip64_static_temp amdvdi_static # To make sure this is the last step + COMMENT "Combining static libs into amdhip64_static" +) INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) @@ -174,6 +196,6 @@ INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhip_hcc.so DESTINATION lib COMPONENT MA INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc.so DESTINATION lib COMPONENT MAIN) -INSTALL(TARGETS amdhip64_static amdhip64 host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) +INSTALL(TARGETS amdhip64 amdhip64_static host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) INSTALL(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 9b4bd17042..10819350f5 100755 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -132,6 +132,7 @@ namespace hip { extern void init(); extern Device* getCurrentDevice(); + extern void setCurrentDevice(unsigned int index); /// Get VDI queue associated with hipStream @@ -255,6 +256,11 @@ private: ~PlatformState() {} public: static PlatformState& instance() { + if (platform_ == nullptr) { + // __hipRegisterFatBinary() will call this when app starts, thus + // there is no multiple entry issue here. + platform_ = new PlatformState(); + } return *platform_; } diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index 5ece473e06..822f68dce6 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -30,7 +30,7 @@ constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" thread_local std::stack execStack_; -PlatformState* PlatformState::platform_ = new PlatformState(); +PlatformState* PlatformState::platform_; // Initiaized as nullptr by default struct __CudaFatBinaryWrapper { unsigned int magic; From 2eb8cc8e90e6e453c2de3e98d2f599b78cc3d76b Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Mon, 13 Apr 2020 22:51:46 -0400 Subject: [PATCH 099/132] SWDEV-229480 - Improve error messages in HIP Layer. Change-Id: I054b979d3aa6cf6ed4ca14a9393bdcba757772ff --- vdi/hip_context.cpp | 1 + vdi/hip_memory.cpp | 42 ++++++++++++++++++-- vdi/hip_module.cpp | 11 ++++++ vdi/hip_peer.cpp | 0 vdi/hip_platform.cpp | 39 +++++++++++++++++- vdi/hip_rtc.cpp | 4 ++ vdi/hip_texture.cpp | 94 ++++++++++++++++++-------------------------- 7 files changed, 131 insertions(+), 60 deletions(-) mode change 100644 => 100755 vdi/hip_context.cpp mode change 100644 => 100755 vdi/hip_memory.cpp mode change 100644 => 100755 vdi/hip_peer.cpp mode change 100644 => 100755 vdi/hip_rtc.cpp mode change 100644 => 100755 vdi/hip_texture.cpp diff --git a/vdi/hip_context.cpp b/vdi/hip_context.cpp old mode 100644 new mode 100755 index 440c3f4b47..1e2ae46fd5 --- a/vdi/hip_context.cpp +++ b/vdi/hip_context.cpp @@ -216,6 +216,7 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { *dev = g_ctxtStack.top(); g_ctxtStack.pop(); } else { + DevLogError("Context Stack empty \n"); HIP_RETURN(hipErrorInvalidContext); } diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp old mode 100644 new mode 100755 index eb56b69bd2..9debd91bf6 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -179,8 +179,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hipExtMallocWithFlags, ptr, sizeBytes, flags); - if (flags != hipDeviceMallocDefault && - flags != hipDeviceMallocFinegrained) { + if (flags != hipDeviceMallocDefault && flags != hipDeviceMallocFinegrained) { HIP_RETURN(hipErrorInvalidValue); } @@ -205,6 +204,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { // can't have both Coherent and NonCoherent flags set at the same time if ((flags & coherentFlags) == coherentFlags) { + DevLogPrintfError("Cannot have both coherent and non-coherent flags " + "at the same time, flags: %u coherent flags: %u \n", + flags, coherentFlags); HIP_RETURN(hipErrorInvalidValue); } @@ -417,16 +419,20 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder, amd::Memory* buffer) { const amd::Image::Format imageFormat({channelOrder, channelType}); if (!imageFormat.isValid()) { + DevLogPrintfError("Invalid Image format for channel Order:%u Type:%u \n", + channelOrder, channelType); return nullptr; } amd::Context& context = *hip::getCurrentDevice()->asContext(); if (!imageFormat.isSupported(context, imageType)) { + DevLogPrintfError("Image type: %u not supported \n", imageType); return nullptr; } const std::vector& devices = context.devices(); if (!devices[0]->info().imageSupport_) { + DevLogPrintfError("Device: 0x%x does not support image \n", devices[0]); return nullptr; } @@ -436,6 +442,7 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder, imageHeight, imageDepth, imageArraySize)) { + DevLogError("Image does not have valid dimensions \n"); return nullptr; } @@ -509,6 +516,7 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder, } if (!image->create(nullptr)) { + DevLogPrintfError("Cannot create image: 0x%x \n", image); delete image; return nullptr; } @@ -630,8 +638,7 @@ hipError_t hipMalloc3DArray(hipArray_t* array, hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { HIP_INIT_API(hipHostGetFlags, flagsPtr, hostPtr); - if (flagsPtr == nullptr || - hostPtr == nullptr) { + if (flagsPtr == nullptr || hostPtr == nullptr) { HIP_RETURN(hipErrorInvalidValue); } @@ -657,6 +664,8 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) constexpr bool forceAlloc = true; if (!mem->create(hostPtr, sysMemAlloc, skipAlloc, forceAlloc)) { mem->release(); + DevLogPrintfError("Cannot create memory for size: %u with flags: %d \n", + sizeBytes, flags); HIP_RETURN(hipErrorOutOfMemory); } @@ -703,6 +712,7 @@ hipError_t hipHostUnregister(void* hostPtr) { } } + DevLogPrintfError("Cannot unregister host_ptr: 0x%x \n", hostPtr); HIP_RETURN(hipErrorInvalidValue); } @@ -721,16 +731,20 @@ hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t count, std::string symbolName; if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + DevLogPrintfError("cannot find symbol 0x%x \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, &device_ptr, &sym_size)) { + DevLogPrintfError("Cannot get global var: %s at device: %d \n", symbolName.c_str(), ihipGetDevice()); HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ if ((offset + count) != sym_size) { + DevLogPrintfError("Size does not match, offset: %u count: %u sym_size: %u \n", + offset, count, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } @@ -749,16 +763,20 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t count, std::string symbolName; if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, &device_ptr, &sym_size)) { + DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ if ((offset + count) != sym_size) { + DevLogPrintfError("Size does not match, offset: %u count: %u sym_size: %u \n", + offset, count, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } @@ -777,16 +795,20 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t co std::string symbolName; if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, &device_ptr, &sym_size)) { + DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ if ((offset + count) != sym_size) { + DevLogPrintfError("Size does not match, offset: %u count: %u sym_size: %u \n", + offset, count, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } @@ -805,16 +827,20 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t count, std::string symbolName; if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, &device_ptr, &sym_size)) { + DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ if ((offset + count) != sym_size) { + DevLogPrintfError("Size does not match, offset: %u count: %u sym_size: %u \n", + offset, count, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } @@ -1837,12 +1863,16 @@ hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { /* Get AMD::Memory object corresponding to this pointer */ amd_mem_obj = getMemoryObject(dev_ptr, offset); if (amd_mem_obj == nullptr) { + DevLogPrintfError("Cannot retrieve amd_mem_obj for dev_ptr: 0x%x with offset: %u \n", + dev_ptr, offset); HIP_RETURN(hipErrorInvalidDevicePointer); } /* Get Device::Memory object pointer */ dev_mem_obj = amd_mem_obj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0],false); if (dev_mem_obj == nullptr) { + DevLogPrintfError("Cannot get Device memory for amd_mem_obj: 0x%x dev_ptr: 0x%x offset: %u \n", + amd_mem_obj, dev_ptr, offset); HIP_RETURN(hipErrorInvalidDevicePointer); } @@ -1870,6 +1900,7 @@ hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigne amd_mem_obj = device->IpcAttach(&(ihandle->ipc_handle), ihandle->psize, flags, dev_ptr); if (amd_mem_obj == nullptr) { + DevLogPrintfError("cannot attach ipc_handle: with ipc_size: %u flags: %u", ihandle->psize, flags); HIP_RETURN(hipErrorInvalidDevicePointer); } @@ -1954,9 +1985,12 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void } ++device; } + DevLogPrintfError("Cannot find memory object context, memObjCtx: 0x%x \n", + memObjCtx); HIP_RETURN(hipErrorInvalidDevice); } + DevLogPrintfError("Cannot get amd_mem_obj for ptr: 0x%x \n", ptr); HIP_RETURN(hipErrorInvalidValue); } diff --git a/vdi/hip_module.cpp b/vdi/hip_module.cpp index 3d40d8c967..f523a86a0f 100755 --- a/vdi/hip_module.cpp +++ b/vdi/hip_module.cpp @@ -94,10 +94,12 @@ hipError_t hipModuleUnload(hipModule_t hmod) amd::Program* program = as_amd(reinterpret_cast(hmod)); if(!PlatformState::instance().unregisterFunc(hmod)) { + DevLogPrintfError("Cannot unregister module: 0x%x \n", hmod); HIP_RETURN(hipErrorInvalidSymbol); } if(!ihipModuleUnregisterGlobal(hmod)) { + DevLogPrintfError("Cannot unregister Global vars for module: 0x%x \n", hmod); HIP_RETURN(hipErrorInvalidSymbol); } @@ -134,6 +136,7 @@ inline bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* modu = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) { + DevLogPrintfError("Could not get undefined Variables for Module: 0x%x \n", *module); return false; } @@ -163,6 +166,7 @@ inline bool ihipModuleRegisterFunc(amd::Program* program, hipModule_t* module) { // Get all the global func names from COMGR if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { + DevLogPrintfError("Could not get Global Funcs from Code Obj for Module: 0x%x \n", *module); return false; } @@ -180,6 +184,7 @@ inline bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { + DevLogPrintfError("Could not get Global vars from Code Obj for Module: 0x%x \n", *module); return false; } @@ -244,6 +249,8 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); if (!PlatformState::instance().findModFunc(hfunc, hmod, name)) { + DevLogPrintfError("Cannot find the function: %s for module: 0x%x \n", + name, hmod); HIP_RETURN(hipErrorNotFound); } HIP_RETURN(hipSuccess); @@ -256,6 +263,8 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), hmod, dptr, bytes)) { + DevLogPrintfError("Cannot find global Var: %s for module: 0x%x at device: %d \n", + name, hmod, ihipGetDevice()); HIP_RETURN(hipErrorNotFound); } @@ -653,6 +662,8 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const /* Get address and size for the global symbol */ if (!PlatformState::instance().getTexRef(name, hmod, texRef)) { + DevLogPrintfError("Cannot get texRef for name: %s at module:0x%x \n", + name, hmod); HIP_RETURN(hipErrorNotFound); } diff --git a/vdi/hip_peer.cpp b/vdi/hip_peer.cpp old mode 100644 new mode 100755 diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index 822f68dce6..7b63d5225b 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -132,6 +132,8 @@ extern "C" std::vector>* __hipRegisterFatBinary(con { const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { + DevLogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ", + fbwrapper->magic, fbwrapper->version); return nullptr; } @@ -272,6 +274,7 @@ bool PlatformState::findSymbol(const void *hostVar, std::string &symbolName) { symbolName = it->second; return true; } + DevLogPrintfError("Could not find the Symbol: %s \n", symbolName.c_str()); return false; } @@ -298,6 +301,7 @@ bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFunc const auto it = dev_program->kernels().find(std::string(func_name)); if (it == dev_program->kernels().cend()) { + DevLogPrintfError("Could not find the function %s \n", func_name); return false; } @@ -318,6 +322,7 @@ bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod, *var_size = dvar->size; return true; } else { + DevLogPrintfError("Cannot find Var name: %s in module: 0x%x \n", var_name.c_str(), hmod); return false; } } @@ -357,6 +362,7 @@ bool PlatformState::findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const ch PlatformState::DeviceFunction& devFunc = func_it->second; if (devFunc.functions[ihipGetDevice()] == 0) { if(!createFunc(&devFunc.functions[ihipGetDevice()], hmod, name)) { + DevLogPrintfError("Could not create a function: %s at module: 0x%x \n", name, hmod); return false; } } @@ -364,6 +370,7 @@ bool PlatformState::findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const ch return true; } } + DevLogPrintfError("Cannot find module: 0x%x in PlatformState Module Map \n", hmod); return false; } @@ -372,15 +379,22 @@ bool PlatformState::createFunc(hipFunction_t* hfunc, hipModule_t hmod, const cha const amd::Symbol* symbol = program->findSymbol(name); if (!symbol) { + DevLogPrintfError("Cannot find Symbol with name: %s \n", name); return false; } amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name); if (!kernel) { + DevLogPrintfError("Could not create a new kernel with name: %s \n", name); return false; } hip::Function* f = new hip::Function(kernel); + if (!f) { + DevLogPrintfError("Could not create a new function with name: %s \n", name); + return false; + } + *hfunc = f->asHipFunction(); return true; @@ -398,6 +412,7 @@ hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { amd::Program* program = as_amd(reinterpret_cast(module)); program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + DevLogPrintfError("Build error for module: 0x%x at device: %u \n", module, deviceId); return nullptr; } (*devFunc.modules)[deviceId].second = true; @@ -414,6 +429,7 @@ hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { } return devFunc.functions[deviceId]; } + DevLogPrintfError("Cannot find function: 0x%x in PlatformState \n", hostFunction); return nullptr; } @@ -425,6 +441,7 @@ bool PlatformState::getFuncAttr(const void* hostFunction, const auto it = functions_.find(hostFunction); if (it == functions_.cend()) { + DevLogPrintfError("Cannot find hostFunction 0x%x \n", hostFunction); return false; } @@ -434,12 +451,15 @@ bool PlatformState::getFuncAttr(const void* hostFunction, /* If module has not been initialized yet, build the kernel now*/ if (!(*devFunc.modules)[deviceId].second) { if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) { + DevLogPrintfError("Cannot get hostFunction: 0x%x for deviceId:%d \n", hostFunction, deviceId); return false; } } amd::Program* program = as_amd(reinterpret_cast((*devFunc.modules)[deviceId].first)); if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) { + DevLogPrintfError("Cannot get Func attributes for function: %s \n", + devFunc.deviceName.c_str()); return false; } return true; @@ -449,10 +469,13 @@ bool PlatformState::getTexRef(const char* hostVar, hipModule_t hmod, textureRefe amd::ScopedLock lock(lock_); DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), hmod); if (dvar == nullptr) { + DevLogPrintfError("Cannot find var:%s for creating texture reference at module: 0x%x \n", + hostVar, hmod); return false; } if (!dvar->dyn_undef) { + DevLogPrintfError("HostVar: %s is not created through hipModuleLoad \n", hostVar); return false; } @@ -475,6 +498,7 @@ bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t amd::Program* program = as_amd(reinterpret_cast((*dvar->modules)[deviceId].first)); program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + DevLogPrintfError("Build Failure for module: 0x%x \n", hmod); return false; } (*dvar->modules)[deviceId].second = true; @@ -487,13 +511,14 @@ bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj; amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj); } else { - LogError("[HIP] __hipRegisterVar cannot find kernel for device \n"); + LogError("__hipRegisterVar cannot find kernel for device \n"); } } *size_ptr = dvar->rvars[deviceId].getvarsize(); *dev_ptr = dvar->rvars[deviceId].getdeviceptr(); return true; } else { + DevLogPrintfError("Could not find global var: %s at module:0x%x \n", hostVar, hmod); return false; } } @@ -638,10 +663,12 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) hip::Stream* stream = reinterpret_cast(exec.hStream_); int deviceId = (stream != nullptr)? stream->device->deviceId() : ihipGetDevice(); if (deviceId == -1) { + DevLogPrintfError("Wrong DeviceId: %d \n", deviceId); HIP_RETURN(hipErrorNoDevice); } hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); if (func == nullptr) { + DevLogPrintfError("Could not retrieve hostFunction: 0x%x \n", hostFunction); HIP_RETURN(hipErrorInvalidDeviceFunction); } @@ -663,11 +690,14 @@ hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { std::string symbolName; if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } size_t size = 0; if(!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, devPtr, &size)) { + DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n", + symbolName.c_str(), ihipGetDevice()); HIP_RETURN(hipErrorInvalidSymbol); } HIP_RETURN(hipSuccess); @@ -678,11 +708,14 @@ hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) { std::string symbolName; if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } hipDeviceptr_t devPtr = nullptr; if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, &devPtr, sizePtr)) { + DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n", + symbolName.c_str(), ihipGetDevice()); HIP_RETURN(hipErrorInvalidSymbol); } HIP_RETURN(hipSuccess); @@ -701,10 +734,12 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); if (dev_program == nullptr) { + DevLogPrintfError("Cannot get Device Function for module: 0x%x \n", hmod); HIP_RETURN(hipErrorInvalidDeviceFunction); } /* Find the global Symbols */ if (!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { + DevLogPrintfError("Cannot create Global Var obj for symbol: %s \n", name); HIP_RETURN(hipErrorInvalidSymbol); } @@ -1107,6 +1142,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, hip::Stream* s = reinterpret_cast(stream); int deviceId = (s != nullptr)? s->device->deviceId() : ihipGetDevice(); if (deviceId == -1) { + DevLogPrintfError("Wrong Device Id: %d \n", deviceId); HIP_RETURN(hipErrorNoDevice); } hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); @@ -1114,6 +1150,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, #ifdef ATI_OS_LINUX const auto it = hip_impl::functions().find(reinterpret_cast(hostFunction)); if (it == hip_impl::functions().cend()) { + DevLogPrintfError("Cannot find function: 0x%x \n", hostFunction); HIP_RETURN(hipErrorInvalidDeviceFunction); } func = it->second; diff --git a/vdi/hip_rtc.cpp b/vdi/hip_rtc.cpp old mode 100644 new mode 100755 index 9897b98b7f..8c82337405 --- a/vdi/hip_rtc.cpp +++ b/vdi/hip_rtc.cpp @@ -109,6 +109,7 @@ char* demangle(const char* loweredName) { int status = 0; char* demangledName = DEMANGLE(loweredName, nullptr, nullptr, &status); if (status != 0) { + DevLogPrintfError("Cannot demangle loweredName: %s \n", loweredName); return nullptr; } #elif defined(_WIN32) @@ -118,6 +119,8 @@ char* demangle(const char* loweredName) { UNDECORATED_SIZE/ sizeof(*demangledName), UNDNAME_COMPLETE)) { free(demangledName); + DevLogPrintfError("Cannot undecorate loweredName: %s demangledName: %s \n", + loweredName, demangedName); return nullptr; } #else @@ -192,6 +195,7 @@ const char* hiprtcGetErrorString(hiprtcResult x) { case HIPRTC_ERROR_INTERNAL_ERROR: return "HIPRTC_ERROR_INTERNAL_ERROR"; default: + DevLogPrintfError("Invalid HIPRTC error code: %d \n", x); return nullptr; }; diff --git a/vdi/hip_texture.cpp b/vdi/hip_texture.cpp old mode 100644 new mode 100755 index b837729721..da24d663d1 --- a/vdi/hip_texture.cpp +++ b/vdi/hip_texture.cpp @@ -335,8 +335,7 @@ hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipGetTextureObjectResourceDesc, pResDesc, texObject); - if ((pResDesc == nullptr) || - (texObject == nullptr)) { + if ((pResDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -349,8 +348,7 @@ hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc hipTextureObject_t texObject) { HIP_INIT_API(hipGetTextureObjectResourceViewDesc, pResViewDesc, texObject); - if ((pResViewDesc == nullptr) || - (texObject == nullptr)) { + if ((pResViewDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -363,8 +361,7 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipGetTextureObjectTextureDesc, pTexDesc, texObject); - if ((pTexDesc == nullptr) || - (texObject == nullptr)) { + if ((pTexDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -383,8 +380,8 @@ inline bool ihipGetTextureAlignmentOffset(size_t* offset, // If the device memory pointer was returned from hipMalloc(), // the offset is guaranteed to be 0 and NULL may be passed as the offset parameter. - if ((alignedOffset != 0) && - (offset == nullptr)) { + if ((alignedOffset != 0) && (offset == nullptr)) { + DevLogPrintfError("Texture object not aligned with offset %u \n", alignedOffset); return false; } @@ -574,8 +571,7 @@ hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) { HIP_INIT_API(hipGetChannelDesc, desc, array); - if ((desc == nullptr) || - (array == nullptr)) { + if ((desc == nullptr) || (array == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -590,8 +586,7 @@ hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref) { HIP_INIT_API(hipGetTextureAlignmentOffset, offset, texref); - if ((offset == nullptr) || - (texref == nullptr)) { + if ((offset == nullptr) || (texref == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -670,13 +665,14 @@ hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* pam, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetAddressMode, pam, texRef, dim); - if ((pam == nullptr) || - (texRef == nullptr)) { + if ((pam == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } // Currently, the only valid value for dim are 0 and 1. if ((dim != 0) || (dim != 1)) { + DevLogPrintfError("Currently only 2 dimensions (0,1) are valid," + "dim : %d \n", dim); HIP_RETURN(hipErrorInvalidValue); } @@ -695,6 +691,8 @@ hipError_t hipTexRefSetAddressMode(textureReference* texRef, } if ((dim < 0) || (dim > 2)) { + DevLogPrintfError("Currently only 3 dimensions (0,1,2) are valid," + "dim : %d \n", dim); HIP_RETURN(hipErrorInvalidValue); } @@ -708,8 +706,7 @@ hipError_t hipTexRefGetArray(hipArray_t* pArray, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetArray, pArray, texRef); - if ((pArray == nullptr) || - (texRef == nullptr)) { + if ((pArray == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -723,8 +720,9 @@ hipError_t hipTexRefGetArray(hipArray_t* pArray, switch (resDesc.resType) { case hipResourceTypeLinear: case hipResourceTypePitch2D: - case hipResourceTypeMipmappedArray: + case hipResourceTypeMipmappedArray: { HIP_RETURN(hipErrorInvalidValue); + } case hipResourceTypeArray: *pArray = resDesc.res.array.array; break; @@ -738,8 +736,7 @@ hipError_t hipTexRefSetArray(textureReference* texRef, unsigned int flags) { HIP_INIT_API(hipTexRefSetArray, texRef, array, flags); - if ((texRef == nullptr) || - (array == nullptr)) { + if ((texRef == nullptr) || (array == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -769,8 +766,7 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetAddress, dptr, texRef); - if ((dptr == nullptr) || - (texRef == nullptr)) { + if ((dptr == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -778,6 +774,8 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. hipError_t error = hipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); if (error != hipSuccess) { + DevLogPrintfError("hipGetTextureObjectResourceDesc failed with error code: %s \n", + hipGetErrorName(error)); return HIP_RETURN(error); } @@ -786,8 +784,9 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, // If the texture reference is not bound to any device memory range, // return hipErroInvalidValue. case hipResourceTypeArray: - case hipResourceTypeMipmappedArray: + case hipResourceTypeMipmappedArray: { HIP_RETURN(hipErrorInvalidValue); + } case hipResourceTypeLinear: *dptr = resDesc.res.linear.devPtr; break; @@ -838,8 +837,7 @@ hipError_t hipTexRefSetAddress2D(textureReference* texRef, size_t Pitch) { HIP_INIT_API(hipTexRefSetAddress2D, texRef, desc, dptr, Pitch); - if ((texRef == nullptr) || - (desc == nullptr)) { + if ((texRef == nullptr) || (desc == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -870,8 +868,7 @@ hipError_t hipTexRefGetBorderColor(float* pBorderColor, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetBorderColor, pBorderColor, texRef); - if ((pBorderColor == nullptr) || - (texRef == nullptr)) { + if ((pBorderColor == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -887,8 +884,7 @@ hipError_t hipTexRefGetFilterMode(hipTextureFilterMode* pfm, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetFilterMode, pfm, texRef); - if ((pfm == nullptr) || - (texRef == nullptr)) { + if ((pfm == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -902,8 +898,7 @@ hipError_t hipTexRefGetFlags(unsigned int* pFlags, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetFlags, pFlags, texRef); - if ((pFlags == nullptr) || - (texRef == nullptr)) { + if ((pFlags == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -930,8 +925,7 @@ hipError_t hipTexRefGetFormat(hipArray_Format* pFormat, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetFormat, pFormat, pNumChannels, texRef); - if ((pFormat == nullptr) || - (pNumChannels == nullptr) || + if ((pFormat == nullptr) || (pNumChannels == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -947,8 +941,7 @@ hipError_t hipTexRefGetMaxAnisotropy(int* pmaxAnsio, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMaxAnisotropy, pmaxAnsio, texRef); - if ((pmaxAnsio == nullptr) || - (texRef == nullptr)) { + if ((pmaxAnsio == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -962,8 +955,7 @@ hipError_t hipTexRefGetMipmapFilterMode(hipTextureFilterMode* pfm, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMipmapFilterMode, pfm, texRef); - if ((pfm == nullptr) || - (texRef == nullptr)) { + if ((pfm == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -977,8 +969,7 @@ hipError_t hipTexRefGetMipmapLevelBias(float* pbias, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMipmapLevelBias, pbias, texRef); - if ((pbias == nullptr) || - (texRef == nullptr)) { + if ((pbias == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -993,8 +984,7 @@ hipError_t hipTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMipmapLevelClamp, pminMipmapLevelClamp, pmaxMipmapLevelClamp, texRef); - if ((pminMipmapLevelClamp == nullptr) || - (pmaxMipmapLevelClamp == nullptr) || + if ((pminMipmapLevelClamp == nullptr) || (pmaxMipmapLevelClamp == nullptr) || (texRef == nullptr)){ HIP_RETURN(hipErrorInvalidValue); } @@ -1010,8 +1000,7 @@ hipError_t hipTexRefGetMipmappedArray(hipMipmappedArray_t* pArray, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMipmappedArray, pArray, &texRef); - if ((pArray == nullptr) || - (texRef == nullptr)) { + if ((pArray == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1025,8 +1014,9 @@ hipError_t hipTexRefGetMipmappedArray(hipMipmappedArray_t* pArray, switch (resDesc.resType) { case hipResourceTypeLinear: case hipResourceTypePitch2D: - case hipResourceTypeArray: + case hipResourceTypeArray: { HIP_RETURN(hipErrorInvalidValue); + } case hipResourceTypeMipmappedArray: *pArray = resDesc.res.mipmap.mipmap; break; @@ -1039,8 +1029,7 @@ hipError_t hipTexRefSetBorderColor(textureReference* texRef, float* pBorderColor) { HIP_INIT_API(hipTexRefSetBorderColor, texRef, pBorderColor); - if ((texRef == nullptr) || - (pBorderColor == nullptr)) { + if ((texRef == nullptr) || (pBorderColor == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1110,8 +1099,7 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, unsigned int Flags) { HIP_INIT_API(hipTexRefSetMipmappedArray, texRef, mipmappedArray, Flags); - if ((texRef == nullptr) || - (mipmappedArray == nullptr)) { + if ((texRef == nullptr) || (mipmappedArray == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1142,8 +1130,7 @@ hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_VIEW_DESC* pResViewDesc) { HIP_INIT_API(hipTexObjectCreate, pTexObject, pResDesc, pTexDesc, pResViewDesc); - if ((pTexObject == nullptr) || - (pResDesc == nullptr) || (pTexDesc == nullptr)) { + if ((pTexObject == nullptr) || (pResDesc == nullptr) || (pTexDesc == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1168,8 +1155,7 @@ hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipTexObjectGetResourceDesc, pResDesc, texObject); - if ((pResDesc == nullptr) || - (texObject == nullptr)) { + if ((pResDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1182,8 +1168,7 @@ hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipTexObjectGetResourceViewDesc, pResViewDesc, texObject); - if ((pResViewDesc == nullptr) || - (texObject == nullptr)) { + if ((pResViewDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1196,8 +1181,7 @@ hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipTexObjectGetTextureDesc, pTexDesc, texObject); - if ((pTexDesc == nullptr) || - (texObject == nullptr)) { + if ((pTexDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } From db70fc66b7cdec8fb749e78f6ecc28b1a2336991 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 17 Apr 2020 10:42:46 -0400 Subject: [PATCH 100/132] SWDEV-231579 - [hipclang-vdi-rocm][perf] - HIPPerfDispatchSpeed disparity between HIP/HCC vs HIP/VDI Insert a wait marker command in the default stream only when HIP has pending operations on other async streams Change-Id: I68660a54867fab7571ba57eb1df5feb1bca1c61a --- vdi/hip_context.cpp | 27 +++++++++++++++++++-- vdi/hip_device.cpp | 2 ++ vdi/hip_device_runtime.cpp | 2 -- vdi/hip_event.cpp | 3 +-- vdi/hip_internal.hpp | 10 ++++---- vdi/hip_memory.cpp | 5 ---- vdi/hip_stream.cpp | 48 +++++++++++++++++++++++++++++++------- 7 files changed, 72 insertions(+), 25 deletions(-) diff --git a/vdi/hip_context.cpp b/vdi/hip_context.cpp index 1e2ae46fd5..8869bb07ff 100755 --- a/vdi/hip_context.cpp +++ b/vdi/hip_context.cpp @@ -80,13 +80,36 @@ void setCurrentDevice(unsigned int index) { amd::HostQueue* getQueue(hipStream_t stream) { if (stream == nullptr) { - syncStreams(); return getNullStream(); } else { hip::Stream* s = reinterpret_cast(stream); + // Wait for null stream if ((s->flags & hipStreamNonBlocking) == 0) { - getNullStream()->finish(); + amd::HostQueue* nullStream = getNullStream(); + amd::Command::EventWaitList eventWaitList; + + amd::Command* command = nullStream->getLastQueuedCommand(true); + if ((command != nullptr) && + // Check the current active status + (command->status() != CL_COMPLETE)) { + eventWaitList.push_back(command); + } + + // Check if we have to wait anything + if (eventWaitList.size() > 0) { + amd::Command* command = new amd::Marker(*s->asHostQueue(), false, eventWaitList); + if (command != nullptr) { + command->enqueue(); + command->release(); + } + } + + // Release all active commands. It's safe after the marker was enqueued + for (const auto& it : eventWaitList) { + it->release(); + } } + return s->asHostQueue(); } } diff --git a/vdi/hip_device.cpp b/vdi/hip_device.cpp index 80e247f37c..c01dd5f195 100644 --- a/vdi/hip_device.cpp +++ b/vdi/hip_device.cpp @@ -35,6 +35,8 @@ amd::HostQueue* Device::defaultStream() { return nullptr; } } + // Wait for all active streams before executing commands on the default + iHipWaitActiveStreams(defaultStream_); return defaultStream_; } diff --git a/vdi/hip_device_runtime.cpp b/vdi/hip_device_runtime.cpp index febf64d116..4cd6731824 100644 --- a/vdi/hip_device_runtime.cpp +++ b/vdi/hip_device_runtime.cpp @@ -432,8 +432,6 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { hipError_t hipDeviceSynchronize ( void ) { HIP_INIT_API(hipDeviceSynchronize); - hip::syncStreams(); - amd::HostQueue* queue = hip::getNullStream(); if (!queue) { diff --git a/vdi/hip_event.cpp b/vdi/hip_event.cpp index 677becd67e..0cd061c1e8 100644 --- a/vdi/hip_event.cpp +++ b/vdi/hip_event.cpp @@ -222,8 +222,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { hip::Stream* s = reinterpret_cast(stream); amd::HostQueue* queue = hip::getQueue(stream); - amd::Command* command = (s != nullptr && (s->flags & hipStreamNonBlocking)) ? - queue->getLastQueuedCommand(true) : nullptr; + amd::Command* command = queue->getLastQueuedCommand(true); if (command == nullptr) { command = new amd::Marker(*queue, false); diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 10819350f5..0d0caada14 100755 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -143,11 +143,6 @@ namespace hip { extern amd::HostQueue* getNullStream(amd::Context&); /// Get default stream of the thread extern amd::HostQueue* getNullStream(); - /// Sync Blocking streams on the current device - extern void syncStreams(); - /// Sync blocking streams on the given device - extern void syncStreams(int devId); - struct Function { amd::Kernel* function_; @@ -289,9 +284,12 @@ public: void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); void popExec(ihipExec_t& exec); - }; +/// Wait all active streams on the blocking queue. The method enqueues a wait command and +/// doesn't stall the current thread +extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue); + extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); extern int ihipGetDevice(); diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index 9debd91bf6..4178cea93e 100755 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -52,7 +52,6 @@ hipError_t ihipFree(void *ptr) if (queue != nullptr) { queue->finish(); } - hip::syncStreams(dev->deviceId()); } amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr); return hipSuccess; @@ -240,7 +239,6 @@ hipError_t hipFree(void* ptr) { hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind); - hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue)); } @@ -289,7 +287,6 @@ hipError_t ihipArrayDestroy(hipArray* array) { if (queue != nullptr) { queue->finish(); } - hip::syncStreams(dev->deviceId()); } as_amd(memObj)->release(); @@ -691,7 +688,6 @@ hipError_t hipHostUnregister(void* hostPtr) { if (queue != nullptr) { queue->finish(); } - hip::syncStreams(dev->deviceId()); } if (amd::SvmBuffer::malloced(hostPtr)) { @@ -1917,7 +1913,6 @@ hipError_t hipIpcCloseMemHandle(void* dev_ptr) { amd::Device* device = nullptr; amd::Memory* amd_mem_obj = nullptr; - hip::syncStreams(); hip::getNullStream()->finish(); if (dev_ptr == nullptr) { diff --git a/vdi/hip_stream.cpp b/vdi/hip_stream.cpp index eac42c0203..aefddef17f 100644 --- a/vdi/hip_stream.cpp +++ b/vdi/hip_stream.cpp @@ -42,20 +42,16 @@ class StreamCallback { namespace hip { -void syncStreams(int devId) { +void syncStreams() { amd::ScopedLock lock(streamSetLock); for (const auto& it : streamSet) { - if (it->device->deviceId() == devId) { + if (it->device->deviceId() == getCurrentDevice()->deviceId()) { it->finish(); } } } -void syncStreams() { - syncStreams(getCurrentDevice()->deviceId()); -} - Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) : queue(nullptr), lock("Stream Callback lock"), device(dev), priority(p), flags(f) {} @@ -89,6 +85,44 @@ void Stream::finish() { }; +void iHipWaitActiveStreams(amd::HostQueue* blocking_queue) { + amd::Command::EventWaitList eventWaitList; + { + amd::ScopedLock lock(streamSetLock); + + for (const auto& it : streamSet) { + // If it's the current device + if ((it->queue != nullptr) && (&it->queue->device() == &blocking_queue->device()) && + // and it's a blocking streamclan + ((it->flags & hipStreamNonBlocking) == 0) && + // and it's not the current stream + (it->asHostQueue() != blocking_queue)) { + // Get the last valid so command + amd::Command* command = it->asHostQueue()->getLastQueuedCommand(true); + if ((command != nullptr) && + // Check the current active status + (command->status() != CL_COMPLETE)) { + eventWaitList.push_back(command); + } + } + } + } + + // Check if we have to wait anything + if (eventWaitList.size() > 0) { + amd::Command* command = new amd::Marker(*blocking_queue, false, eventWaitList); + if (command != nullptr) { + command->enqueue(); + command->release(); + } + } + + // Release all active commands. It's safe after the marker was enqueued + for (const auto& it : eventWaitList) { + it->release(); + } +} + void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { hipError_t status = hipSuccess; StreamCallback* cbo = reinterpret_cast(user_data); @@ -270,5 +304,3 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback HIP_RETURN(hipSuccess); } - - From 16d9fe5e37e2ca94ecb97fe02de9fbe3db452eff Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Mon, 6 Apr 2020 10:57:03 -0400 Subject: [PATCH 101/132] [vdi] Refactor texture/surface reference support. Change-Id: I8014d82aae7139ef5f95e4b50c4fc6da200dbc9d --- include/hip/hcc_detail/host_defines.h | 1 - .../11_texture_driver/tex2dKernel.cpp | 6 +- .../module/hipModuleTexture2dDrv.cpp | 3 - tests/src/runtimeApi/module/tex2d_kernel.cpp | 3 - tests/src/texture/hipBindTex2DPitch.cpp | 3 - tests/src/texture/hipBindTexRef1DFetch.cpp | 3 - .../texture/hipNormalizedFloatValueTex.cpp | 12 -- tests/src/texture/hipTextureRef2D.cpp | 4 +- tests/src/texture/simpleTexture2DLayered.cpp | 3 - tests/src/texture/simpleTexture3D.cpp | 9 -- vdi/hip_hcc.def.in | 2 + vdi/hip_hcc.map.in | 2 + vdi/hip_internal.hpp | 12 ++ vdi/hip_module.cpp | 23 ++- vdi/hip_platform.cpp | 90 ++++++++++-- vdi/hip_texture.cpp | 134 ++++++++++++++++-- 16 files changed, 241 insertions(+), 69 deletions(-) diff --git a/include/hip/hcc_detail/host_defines.h b/include/hip/hcc_detail/host_defines.h index b21946e99f..ad28cc7626 100644 --- a/include/hip/hcc_detail/host_defines.h +++ b/include/hip/hcc_detail/host_defines.h @@ -72,7 +72,6 @@ THE SOFTWARE. #define __noinline__ __attribute__((noinline)) #define __forceinline__ inline __attribute__((always_inline)) -#define __hip_pinned_shadow__ __attribute__((hip_pinned_shadow)) #else diff --git a/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp b/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp index 6fd49fdb0f..5831da0e9d 100644 --- a/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp +++ b/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp @@ -21,11 +21,7 @@ THE SOFTWARE. */ #include "hip/hip_runtime.h" -#if __HIP__ -__hip_pinned_shadow__ -#else -extern -#endif + texture tex; extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) { diff --git a/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp b/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp index e7c254e9fd..3903acd125 100644 --- a/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp +++ b/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp @@ -33,9 +33,6 @@ THE SOFTWARE. #define fileName "tex2d_kernel.code" -#if __HIP__ -__hip_pinned_shadow__ -#endif texture tex; bool testResult = false; diff --git a/tests/src/runtimeApi/module/tex2d_kernel.cpp b/tests/src/runtimeApi/module/tex2d_kernel.cpp index e744d88776..e52843441b 100644 --- a/tests/src/runtimeApi/module/tex2d_kernel.cpp +++ b/tests/src/runtimeApi/module/tex2d_kernel.cpp @@ -27,9 +27,6 @@ THE SOFTWARE. #include "hip/hip_runtime.h" -#if __HIP__ -__hip_pinned_shadow__ -#endif extern texture tex; extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) { diff --git a/tests/src/texture/hipBindTex2DPitch.cpp b/tests/src/texture/hipBindTex2DPitch.cpp index 8c57520c00..6cee22a45d 100644 --- a/tests/src/texture/hipBindTex2DPitch.cpp +++ b/tests/src/texture/hipBindTex2DPitch.cpp @@ -28,9 +28,6 @@ THE SOFTWARE. #define SIZE_W 12 #define TYPE_t float -#if __HIP__ -__hip_pinned_shadow__ -#endif texture tex; // texture object is a kernel argument diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp index 2e962fb05d..af79153fe0 100644 --- a/tests/src/texture/hipBindTexRef1DFetch.cpp +++ b/tests/src/texture/hipBindTexRef1DFetch.cpp @@ -32,9 +32,6 @@ THE SOFTWARE. #define N 512 -#if __HIP__ -__hip_pinned_shadow__ -#endif texture tex; __global__ void kernel(float *out) { diff --git a/tests/src/texture/hipNormalizedFloatValueTex.cpp b/tests/src/texture/hipNormalizedFloatValueTex.cpp index b4aa3e9c05..af33a29d3c 100644 --- a/tests/src/texture/hipNormalizedFloatValueTex.cpp +++ b/tests/src/texture/hipNormalizedFloatValueTex.cpp @@ -42,24 +42,12 @@ static float getNormalizedValue(const float value, return value; } -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texc; -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texuc; -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texs; -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texus; diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp index 5573cf6884..5247f81fe0 100644 --- a/tests/src/texture/hipTextureRef2D.cpp +++ b/tests/src/texture/hipTextureRef2D.cpp @@ -9,9 +9,7 @@ #include #include "test_common.h" -#if __HIP__ -__hip_pinned_shadow__ -#endif + texture tex; __global__ void tex2DKernel(float* outputData, diff --git a/tests/src/texture/simpleTexture2DLayered.cpp b/tests/src/texture/simpleTexture2DLayered.cpp index f4d3aac1e5..8b1bbb64a3 100644 --- a/tests/src/texture/simpleTexture2DLayered.cpp +++ b/tests/src/texture/simpleTexture2DLayered.cpp @@ -30,9 +30,6 @@ THE SOFTWARE. typedef float T; // Texture reference for 2D Layered texture -#if __HIP__ -__hip_pinned_shadow__ -#endif texture tex2DL; __global__ void simpleKernelLayeredArray(T* outputData,int width,int height,int layer) diff --git a/tests/src/texture/simpleTexture3D.cpp b/tests/src/texture/simpleTexture3D.cpp index a494a1a6c0..82f6cf5e99 100644 --- a/tests/src/texture/simpleTexture3D.cpp +++ b/tests/src/texture/simpleTexture3D.cpp @@ -31,19 +31,10 @@ THE SOFTWARE. const char *sampleName = "simpleTexture3D"; // Texture reference for 3D texture -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texf; -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texi; -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texc; template diff --git a/vdi/hip_hcc.def.in b/vdi/hip_hcc.def.in index 5eaedf6851..d8101a1cb8 100755 --- a/vdi/hip_hcc.def.in +++ b/vdi/hip_hcc.def.in @@ -161,6 +161,8 @@ __hipPushCallConfiguration __hipRegisterFatBinary __hipRegisterFunction __hipRegisterVar +__hipRegisterSurface +__hipRegisterTexture __hipUnregisterFatBinary __gnu_h2f_ieee __gnu_f2h_ieee diff --git a/vdi/hip_hcc.map.in b/vdi/hip_hcc.map.in index 98a3479f40..2cd55b5581 100755 --- a/vdi/hip_hcc.map.in +++ b/vdi/hip_hcc.map.in @@ -161,6 +161,8 @@ global: __hipRegisterFatBinary; __hipRegisterFunction; __hipRegisterVar; + __hipRegisterSurface; + __hipRegisterTexture; __hipUnregisterFatBinary; __gnu_h2f_ieee; __gnu_f2h_ieee; diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 0d0caada14..3e09df03d4 100755 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -222,13 +222,22 @@ public: std::vector< std::pair< hipModule_t, bool > >* modules; std::vector functions; }; + enum DeviceVarKind { + DVK_Variable, + DVK_Surface, + DVK_Texture + }; struct DeviceVar { + DeviceVarKind kind; void* shadowVptr; std::string hostVar; size_t size; std::vector< std::pair< hipModule_t, bool > >* modules; std::vector rvars; bool dyn_undef; + int type; // surface/texture type + int norm; // texture has normalized output + bool shadowAllocated = false; // shadow ptr is allocated on-demand and needs freeing. }; private: class Module { @@ -278,6 +287,9 @@ public: hipDeviceptr_t* dev_ptr, size_t* size_ptr); bool getTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef); + bool getGlobalVarFromSymbol(const void* hostVar, int deviceId, + hipDeviceptr_t* dev_ptr, size_t* size_ptr); + bool getShadowVarInfo(std::string var_name, hipModule_t hmod, void** var_addr, size_t* var_size); void setupArgument(const void *arg, size_t size, size_t offset); diff --git a/vdi/hip_module.cpp b/vdi/hip_module.cpp index f523a86a0f..5d09f88293 100755 --- a/vdi/hip_module.cpp +++ b/vdi/hip_module.cpp @@ -150,8 +150,15 @@ inline bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* modu = new texture(); memset(tex_hptr, 0x00, sizeof(texture)); - PlatformState::DeviceVar dvar{ reinterpret_cast(tex_hptr), it->c_str(), sizeof(*tex_hptr), modules, - std::vector{ g_devices.size()}, true }; + PlatformState::DeviceVar dvar{PlatformState::DVK_Variable, + reinterpret_cast(tex_hptr), + it->c_str(), + sizeof(*tex_hptr), + modules, + std::vector{g_devices.size()}, + true, + /*type*/ 0, + /*norm*/ 0}; PlatformState::instance().registerVar(it->c_str(), dvar); } @@ -194,8 +201,15 @@ inline bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) modules->at(dev) = std::make_pair(*module, true); } - PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules, - std::vector{ g_devices.size()}, false }; + PlatformState::DeviceVar dvar{PlatformState::DVK_Variable, + nullptr, + it->c_str(), + 0, + modules, + std::vector{g_devices.size()}, + false, + /*type*/ 0, + /*norm*/ 0}; PlatformState::instance().registerVar(it->c_str(), dvar); } @@ -673,4 +687,3 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const HIP_RETURN(hipSuccess); } - diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index 7b63d5225b..d00974c6bc 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -19,7 +19,7 @@ THE SOFTWARE. */ #include - +#include #include "hip_internal.hpp" #include "platform/program.hpp" #include "platform/runtime.hpp" @@ -220,7 +220,7 @@ std::vector< std::pair >* PlatformState::unregisterVar(hipMod DeviceVar& dvar = it->second; if ((*dvar.modules)[0].first == hmod) { rmodules = dvar.modules; - if (dvar.dyn_undef) { + if (dvar.shadowAllocated) { texture* tex_hptr = reinterpret_cast *>(dvar.shadowVptr); delete tex_hptr; @@ -474,12 +474,27 @@ bool PlatformState::getTexRef(const char* hostVar, hipModule_t hmod, textureRefe return false; } - if (!dvar->dyn_undef) { - DevLogPrintfError("HostVar: %s is not created through hipModuleLoad \n", hostVar); + switch (dvar->kind) { + case PlatformState::DVK_Variable: + // TODO: Need to define a target-specific symbol info to indicate the device + // variable kind, i.e. regular variable, texture or surface. + // Before that, have to assume the specified variable is a texture or + // surface reference variable. + dvar->kind = DVK_Texture; + // FALL THROUGH + case PlatformState::DVK_Texture: + break; + default: + // If it's already used as non-texture variable, bail out. return false; } - *texRef = new (dvar->shadowVptr) texture{}; + if (!dvar->shadowVptr) { + dvar->shadowVptr = new texture{}; + dvar->shadowAllocated = true; + } + *texRef = reinterpret_cast(dvar->shadowVptr); + registerVarSym(dvar->shadowVptr, hostVar); return true; } @@ -523,6 +538,18 @@ bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t } } +bool PlatformState::getGlobalVarFromSymbol(const void* hostVar, int deviceId, + hipDeviceptr_t* dev_ptr, + size_t* size_ptr) { + std::string symbolName; + if (!PlatformState::instance().findSymbol(hostVar, symbolName)) { + return false; + } + return PlatformState::instance().getGlobalVar(symbolName.c_str(), + ihipGetDevice(), nullptr, + dev_ptr, size_ptr); +} + void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { auto& arguments = execStack_.top().arguments_; @@ -577,11 +604,56 @@ extern "C" void __hipRegisterVar( int constant, // Whether this variable is constant int global) // Unknown, always 0 { - PlatformState::DeviceVar dvar{var, std::string{ hostVar }, size, modules, - std::vector{g_devices.size()}, false }; + PlatformState::DeviceVar dvar{PlatformState::DVK_Variable, + var, + std::string{hostVar}, + size, + modules, + std::vector{g_devices.size()}, + false, + /*type*/ 0, + /*norm*/ 0}; - PlatformState::instance().registerVar(hostVar, dvar); - PlatformState::instance().registerVarSym(var, deviceVar); + PlatformState::instance().registerVar(hostVar, dvar); + PlatformState::instance().registerVarSym(var, deviceVar); +} + +extern "C" void __hipRegisterSurface(std::vector>* + modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int type, int ext) { + PlatformState::DeviceVar dvar{PlatformState::DVK_Surface, + var, + std::string{hostVar}, + sizeof(surfaceReference), // Copy whole surfaceReference + modules, + std::vector{g_devices.size()}, + false, + type, + /*norm*/ 0}; + PlatformState::instance().registerVar(hostVar, dvar); + PlatformState::instance().registerVarSym(var, deviceVar); +} + +extern "C" void __hipRegisterTexture(std::vector>* + modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int type, int norm, int ext) { + PlatformState::DeviceVar dvar{PlatformState::DVK_Texture, + var, + std::string{hostVar}, + sizeof(textureReference), // Copy whole textureReference so far. + modules, + std::vector{g_devices.size()}, + false, + type, + norm}; + PlatformState::instance().registerVar(hostVar, dvar); + PlatformState::instance().registerVarSym(var, deviceVar); } extern "C" void __hipUnregisterFatBinary(std::vector< std::pair >* modules) diff --git a/vdi/hip_texture.cpp b/vdi/hip_texture.cpp index da24d663d1..94026c8e33 100755 --- a/vdi/hip_texture.cpp +++ b/vdi/hip_texture.cpp @@ -24,6 +24,9 @@ #include "hip_conversions.hpp" #include "platform/sampler.hpp" +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + amd::HostQueue& queue, bool isAsync = false); + struct __hip_texture { uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD]; uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD]; @@ -473,7 +476,20 @@ hipError_t hipBindTexture2D(size_t* offset, size_t pitch) { HIP_INIT_API(hipBindTexture2D, offset, texref, devPtr, desc, width, height, pitch); - HIP_RETURN(ihipBindTexture2D(offset, texref, devPtr, desc, width, height, pitch)); + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTexture2D(offset, texref, devPtr, desc, width, height, pitch); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t ihipBindTextureToArray(const textureReference* texref, @@ -507,7 +523,20 @@ hipError_t hipBindTextureToArray(const textureReference* texref, const hipChannelFormatDesc* desc) { HIP_INIT_API(hipBindTextureToArray, texref, array, desc); - HIP_RETURN(ihipBindTextureToArray(texref, array, desc)); + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTextureToArray(texref, array, desc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t ihipBindTextureToMipmappedArray(const textureReference* texref, @@ -541,7 +570,20 @@ hipError_t hipBindTextureToMipmappedArray(const textureReference* texref, const hipChannelFormatDesc* desc) { HIP_INIT_API(hipBindTextureToMipmappedArray, texref, mipmappedArray, desc); - HIP_RETURN(ihipBindTextureToMipmappedArray(texref, mipmappedArray, desc)); + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTextureToMipmappedArray(texref, mipmappedArray, desc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipUnbindTexture(const textureReference* texref) { @@ -564,7 +606,20 @@ hipError_t hipBindTexture(size_t* offset, size_t size) { HIP_INIT_API(hipBindTexture, offset, texref, devPtr, desc, size); - HIP_RETURN(ihipBindTexture(offset, texref, devPtr, desc, size)); + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTexture(offset, texref, devPtr, desc, size); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, @@ -599,9 +654,12 @@ hipError_t hipGetTextureAlignmentOffset(size_t* offset, hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol) { HIP_INIT_API(hipGetTextureReference, texref, symbol); - assert(0 && "Unimplemented"); + if (texref == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *texref = reinterpret_cast(symbol); - HIP_RETURN(hipErrorNotSupported); + HIP_RETURN(hipSuccess); } hipError_t hipTexRefSetFormat(textureReference* texRef, @@ -744,6 +802,14 @@ hipError_t hipTexRefSetArray(textureReference* texRef, HIP_RETURN(hipErrorInvalidValue); } + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + // Any previous address or HIP array state associated with the texture reference is superseded by this function. // Any memory previously bound to hTexRef is unbound. // No need to check for errors. @@ -758,7 +824,13 @@ hipError_t hipTexRefSetArray(textureReference* texRef, hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(array, format); - HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc)); + hipError_t err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, @@ -808,6 +880,14 @@ hipError_t hipTexRefSetAddress(size_t* ByteOffset, HIP_RETURN(hipErrorInvalidValue); } + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + // Any previous address or HIP array state associated with the texture reference is superseded by this function. // Any memory previously bound to hTexRef is unbound. // No need to check for errors. @@ -828,7 +908,13 @@ hipError_t hipTexRefSetAddress(size_t* ByteOffset, hipTextureDesc texDesc = hip::getTextureDesc(texRef); - HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr)); + hipError_t err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipTexRefSetAddress2D(textureReference* texRef, @@ -841,6 +927,14 @@ hipError_t hipTexRefSetAddress2D(textureReference* texRef, HIP_RETURN(hipErrorInvalidValue); } + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + // Any previous address or HIP array state associated with the texture reference is superseded by this function. // Any memory previously bound to hTexRef is unbound. // No need to check for errors. @@ -856,7 +950,13 @@ hipError_t hipTexRefSetAddress2D(textureReference* texRef, hipTextureDesc texDesc = hip::getTextureDesc(texRef); - HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr)); + hipError_t err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); } hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { @@ -1107,6 +1207,14 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, HIP_RETURN(hipErrorInvalidValue); } + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + // Any previous address or HIP array state associated with the texture reference is superseded by this function. // Any memory previously bound to hTexRef is unbound. // No need to check for errors. @@ -1121,7 +1229,13 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(mipmappedArray, format); - HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc)); + hipError_t err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, From de4c173c6ebe8b8bcc09ab1a4b311d5523b52886 Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Sun, 19 Apr 2020 10:40:47 -0400 Subject: [PATCH 102/132] support hipLaunchParm test with static lib of hip-vdi rt Let hipMalloc() be called in main() so that global variable can be initialized. Change-Id: I9aa1f0a0bb4fa0825d10af0b58c843e7b928e9a3 --- tests/src/kernel/hipLaunchParm.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/src/kernel/hipLaunchParm.cpp b/tests/src/kernel/hipLaunchParm.cpp index 23742d7d6c..797bc84ced 100644 --- a/tests/src/kernel/hipLaunchParm.cpp +++ b/tests/src/kernel/hipLaunchParm.cpp @@ -58,12 +58,10 @@ static const int BLOCK_DIM_SIZE = 512; // allocate memory on device and host for result validation static bool *result_d, *result_h; -static hipError_t hipMallocError = hipMalloc((void**)&result_d, - BLOCK_DIM_SIZE*sizeof(bool)); -static hipError_t hipHostMallocError = hipHostMalloc((void**)&result_h, - BLOCK_DIM_SIZE*sizeof(bool)); -static hipError_t hipMemsetError = hipMemset(result_d, - false, BLOCK_DIM_SIZE); + +static hipError_t hipMallocError = hipErrorUnknown; +static hipError_t hipHostMallocError = hipErrorUnknown; +static hipError_t hipMemsetError = hipErrorUnknown; static void ResultValidation() { hipMemcpy(result_h, result_d, BLOCK_DIM_SIZE*sizeof(bool), @@ -600,6 +598,10 @@ __global__ void vAdd(float* a) {} int main() { + hipMallocError = hipMalloc((void**)&result_d, BLOCK_DIM_SIZE*sizeof(bool)); + hipHostMallocError = hipHostMalloc((void**)&result_h, BLOCK_DIM_SIZE*sizeof(bool)); + hipMemsetError = hipMemset(result_d, false, BLOCK_DIM_SIZE); + // Validating memory & initial value, for result_d, result_h HIPASSERT(hipMallocError == hipSuccess); HIPASSERT(hipHostMallocError == hipSuccess); From daf32606db9e714f1d616edcf792181d1eab54a6 Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Tue, 21 Apr 2020 14:00:27 -0400 Subject: [PATCH 103/132] Add perfDispatch tests into hip direct_tests Port perfDispatch tests from OpenCL into hip direct_tests Change-Id: I9f8362636e329d2d0a434c4f012ccc46a524c746 --- .../hipPerfBufferCopyRectSpeed.cpp | 281 +++++++++++++++++ .../perfDispatch/hipPerfBufferCopySpeed.cpp | 287 ++++++++++++++++++ .../perfDispatch/hipPerfDispatchSpeed.cpp | 210 +++++++++++++ tests/src/Performance/perfDispatch/timer.cpp | 116 +++++++ tests/src/Performance/perfDispatch/timer.h | 28 ++ 5 files changed, 922 insertions(+) create mode 100644 tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp create mode 100644 tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp create mode 100644 tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp create mode 100644 tests/src/Performance/perfDispatch/timer.cpp create mode 100644 tests/src/Performance/perfDispatch/timer.h diff --git a/tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp b/tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp new file mode 100644 index 0000000000..71d8ebbe0a --- /dev/null +++ b/tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp @@ -0,0 +1,281 @@ +#include +#include +#include +#include + +#include "timer.h" +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +// Quiet pesky warnings +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + +#define NUM_SIZES 8 +//4KB, 8KB, 64KB, 256KB, 1 MB, 4MB, 16 MB, 16MB+10 +static const unsigned int Sizes[NUM_SIZES] = {4096, 8192, 65536, 262144, 1048576, 4194304, 16777216, 16777216+10}; + +static const unsigned int Iterations[2] = {1, 1000}; + +#define BUF_TYPES 4 +// 16 ways to combine 4 different buffer types +#define NUM_SUBTESTS (BUF_TYPES*BUF_TYPES) + +#define CHECK_RESULT(test, msg) \ + if ((test)) \ + { \ + printf("\n%s\n", msg); \ + abort(); \ + } + +void setData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size ; i++) + { + ptr2[i] = value; + } +} + +void checkData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size; i++) + { + if (ptr2[i] != value) + { + printf("Data validation failed at %d! Got 0x%08x\n", i, ptr2[i]); + printf("Expected 0x%08x\n", value); + CHECK_RESULT(true, "Data validation failed!"); + break; + } + } +} + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + hipError_t err = hipSuccess; + hipDeviceProp_t props = {0}; + hipGetDeviceProperties(&props, p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" ); + printf("Set device to %d : %s\n", p_gpuDevice, props.name); + printf("Legend: unp - unpinned(malloc), hM - hipMalloc(device)\n"); + printf(" hHR - hipHostRegister(pinned), hHM - hipHostMalloc(prePinned)\n"); + err = hipSetDevice(p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipSetDevice failed" ); + + unsigned int bufSize_; + bool hostMalloc[2] = {false}; + bool hostRegister[2] = {false}; + bool unpinnedMalloc[2] = {false}; + unsigned int numIter; + void *memptr[2] = {NULL}; + void *alignedmemptr[2] = {NULL}; + void* srcBuffer = NULL; + void* dstBuffer = NULL; + + int numTests = (p_tests == -1) ? (NUM_SIZES*NUM_SUBTESTS*2 - 1) : p_tests; + int test = (p_tests == -1) ? 0 : p_tests; + + for(;test <= numTests; test++) + { + unsigned int srcTest = (test / NUM_SIZES) % BUF_TYPES; + unsigned int dstTest = (test / (NUM_SIZES*BUF_TYPES)) % BUF_TYPES; + bufSize_ = Sizes[test % NUM_SIZES]; + hostMalloc[0] = hostMalloc[1] = false; + hostRegister[0] = hostRegister[1] = false; + unpinnedMalloc[0] = unpinnedMalloc[1] = false; + srcBuffer = dstBuffer = 0; + memptr[0] = memptr[1] = NULL; + alignedmemptr[0] = alignedmemptr[1] = NULL; + + size_t width = static_cast(sqrt(static_cast(bufSize_))); + + if (srcTest == 3) + { + hostRegister[0] = true; + } + else if (srcTest == 2) + { + hostMalloc[0] = true; + } + else if (srcTest == 1) + { + unpinnedMalloc[0] = true; + } + + if (dstTest == 1) + { + unpinnedMalloc[1] = true; + } + else if (dstTest == 2) + { + hostMalloc[1] = true; + } + else if (dstTest == 3) + { + hostRegister[1] = true; + } + + numIter = Iterations[test / (NUM_SIZES * NUM_SUBTESTS)]; + + if (hostMalloc[0]) + { + err = hipHostMalloc((void**)&srcBuffer, bufSize_, 0); + setData(srcBuffer, bufSize_, 0xd0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + err = hipHostRegister(srcBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + } + else + { + err = hipMalloc(&srcBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + err = hipMemset(srcBuffer, 0xd0, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMemset failed"); + } + + if (hostMalloc[1]) + { + err = hipHostMalloc((void**)&dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + err = hipHostRegister(dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + } + else + { + err = hipMalloc(&dstBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + } + + CPerfCounter timer; + + //warm up + err = hipMemcpy2D(dstBuffer, width, srcBuffer, width, width, width, hipMemcpyDefault); + CHECK_RESULT(err, "hipMemcpy2D failed"); + + timer.Reset(); + timer.Start(); + for (unsigned int i = 0; i < numIter; i++) + { + err = hipMemcpy2DAsync(dstBuffer, width, srcBuffer, width, width, width, hipMemcpyDefault, NULL); + CHECK_RESULT(err, "hipMemcpyAsync2D failed"); + } + err = hipDeviceSynchronize(); + CHECK_RESULT(err, "hipDeviceSynchronize failed"); + timer.Stop(); + double sec = timer.GetElapsedTime(); + + // Buffer copy bandwidth in GB/s + double perf = ((double)bufSize_*numIter*(double)(1e-09)) / sec; + + const char *strSrc = NULL; + const char *strDst = NULL; + if (hostMalloc[0]) + strSrc = "hHM"; + else if (hostRegister[0]) + strSrc = "hHR"; + else if (unpinnedMalloc[0]) + strSrc = "unp"; + else + strSrc = "hM"; + + if (hostMalloc[1]) + strDst = "hHM"; + else if (hostRegister[1]) + strDst = "hHR"; + else if (unpinnedMalloc[1]) + strDst = "unp"; + else + strDst = "hM"; + // Double results when src and dst are both on device + if ((!hostMalloc[0] && !hostRegister[0] && !unpinnedMalloc[0]) && + (!hostMalloc[1] && !hostRegister[1] && !unpinnedMalloc[1])) + perf *= 2.0; + // Double results when src and dst are both in sysmem + if ((hostMalloc[0] || hostRegister[0] || unpinnedMalloc[0]) && + (hostMalloc[1] || hostRegister[1] || unpinnedMalloc[1])) + perf *= 2.0; + + char buf[256]; + SNPRINTF(buf, sizeof(buf), "HIPPerfBufferCopyRectSpeed[%d]\t(%8d bytes)\ts:%s d:%s\ti:%4d\t(GB/s) perf\t%f", + test, bufSize_, strSrc, strDst, numIter, (float)perf); + printf("%s\n", buf); + + //Free src + if (hostMalloc[0]) + { + hipHostFree(srcBuffer); + } + else if (hostRegister[0]) + { + hipHostUnregister(srcBuffer); + free(memptr[0]); + } + else if (unpinnedMalloc[0]) + { + free(memptr[0]); + } + else + { + hipFree(srcBuffer); + } + + //Free dst + if (hostMalloc[1]) + { + hipHostFree(dstBuffer); + } + else if (hostRegister[1]) + { + hipHostUnregister(dstBuffer); + free(memptr[1]); + } + else if (unpinnedMalloc[1]) + { + free(memptr[1]); + } + else + { + hipFree(dstBuffer); + } + } + + passed(); +} diff --git a/tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp b/tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp new file mode 100644 index 0000000000..239d47b347 --- /dev/null +++ b/tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp @@ -0,0 +1,287 @@ +#include +#include +#include +#include + +#include "timer.h" +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +// Quiet pesky warnings +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + +#define NUM_SIZES 8 +//4KB, 8KB, 64KB, 256KB, 1 MB, 4MB, 16 MB, 16MB+10 +static const unsigned int Sizes[NUM_SIZES] = {4096, 8192, 65536, 262144, 1048576, 4194304, 16777216, 16777216+10}; + +static const unsigned int Iterations[2] = {1, 1000}; + +#define BUF_TYPES 4 +// 16 ways to combine 4 different buffer types +#define NUM_SUBTESTS (BUF_TYPES*BUF_TYPES) + +#define CHECK_RESULT(test, msg) \ + if ((test)) \ + { \ + printf("\n%s\n", msg); \ + abort(); \ + } + +void setData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size ; i++) + { + ptr2[i] = value; + } +} + +void checkData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size; i++) + { + if (ptr2[i] != value) + { + printf("Data validation failed at %d! Got 0x%08x\n", i, ptr2[i]); + printf("Expected 0x%08x\n", value); + CHECK_RESULT(true, "Data validation failed!"); + break; + } + } +} + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + hipError_t err = hipSuccess; + hipDeviceProp_t props = {0}; + hipGetDeviceProperties(&props, p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" ); + printf("Set device to %d : %s\n", p_gpuDevice, props.name); + printf("Legend: unp - unpinned(malloc), hM - hipMalloc(device)\n"); + printf(" hHR - hipHostRegister(pinned), hHM - hipHostMalloc(prePinned)\n"); + err = hipSetDevice(p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipSetDevice failed" ); + + unsigned int bufSize_; + bool hostMalloc[2] = {false}; + bool hostRegister[2] = {false}; + bool unpinnedMalloc[2] = {false}; + unsigned int numIter; + void *memptr[2] = {NULL}; + void *alignedmemptr[2] = {NULL}; + void* srcBuffer = NULL; + void* dstBuffer = NULL; + + int numTests = (p_tests == -1) ? (NUM_SIZES*NUM_SUBTESTS*2 - 1) : p_tests; + int test = (p_tests == -1) ? 0 : p_tests; + + for(;test <= numTests; test++) + { + unsigned int srcTest = (test / NUM_SIZES) % BUF_TYPES; + unsigned int dstTest = (test / (NUM_SIZES*BUF_TYPES)) % BUF_TYPES; + bufSize_ = Sizes[test % NUM_SIZES]; + hostMalloc[0] = hostMalloc[1] = false; + hostRegister[0] = hostRegister[1] = false; + unpinnedMalloc[0] = unpinnedMalloc[1] = false; + srcBuffer = dstBuffer = 0; + memptr[0] = memptr[1] = NULL; + alignedmemptr[0] = alignedmemptr[1] = NULL; + + if (srcTest == 3) + { + hostRegister[0] = true; + } + else if (srcTest == 2) + { + hostMalloc[0] = true; + } + else if (srcTest == 1) + { + unpinnedMalloc[0] = true; + } + + if (dstTest == 1) + { + unpinnedMalloc[1] = true; + } + else if (dstTest == 2) + { + hostMalloc[1] = true; + } + else if (dstTest == 3) + { + hostRegister[1] = true; + } + + numIter = Iterations[test / (NUM_SIZES * NUM_SUBTESTS)]; + + if (hostMalloc[0]) + { + err = hipHostMalloc((void**)&srcBuffer, bufSize_, 0); + setData(srcBuffer, bufSize_, 0xd0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + err = hipHostRegister(srcBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + } + else + { + err = hipMalloc(&srcBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + err = hipMemset(srcBuffer, 0xd0, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMemset failed"); + } + + if (hostMalloc[1]) + { + err = hipHostMalloc((void**)&dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + err = hipHostRegister(dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + } + else + { + err = hipMalloc(&dstBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + } + + CPerfCounter timer; + + //warm up + err = hipMemcpy(dstBuffer, srcBuffer, bufSize_, hipMemcpyDefault); + CHECK_RESULT(err, "hipMemcpy failed"); + + timer.Reset(); + timer.Start(); + for (unsigned int i = 0; i < numIter; i++) + { + err = hipMemcpyAsync(dstBuffer, srcBuffer, bufSize_, hipMemcpyDefault, NULL); + CHECK_RESULT(err, "hipMemcpyAsync failed"); + } + err = hipDeviceSynchronize(); + CHECK_RESULT(err, "hipDeviceSynchronize failed"); + timer.Stop(); + double sec = timer.GetElapsedTime(); + + // Buffer copy bandwidth in GB/s + double perf = ((double)bufSize_*numIter*(double)(1e-09)) / sec; + + const char *strSrc = NULL; + const char *strDst = NULL; + if (hostMalloc[0]) + strSrc = "hHM"; + else if (hostRegister[0]) + strSrc = "hHR"; + else if (unpinnedMalloc[0]) + strSrc = "unp"; + else + strSrc = "hM"; + + if (hostMalloc[1]) + strDst = "hHM"; + else if (hostRegister[1]) + strDst = "hHR"; + else if (unpinnedMalloc[1]) + strDst = "unp"; + else + strDst = "hM"; + // Double results when src and dst are both on device + if ((!hostMalloc[0] && !hostRegister[0] && !unpinnedMalloc[0]) && + (!hostMalloc[1] && !hostRegister[1] && !unpinnedMalloc[1])) + perf *= 2.0; + // Double results when src and dst are both in sysmem + if ((hostMalloc[0] || hostRegister[0] || unpinnedMalloc[0]) && + (hostMalloc[1] || hostRegister[1] || unpinnedMalloc[1])) + perf *= 2.0; + + char buf[256]; + SNPRINTF(buf, sizeof(buf), "HIPPerfBufferCopySpeed[%d]\t(%8d bytes)\ts:%s d:%s\ti:%4d\t(GB/s) perf\t%f", + test, bufSize_, strSrc, strDst, numIter, (float)perf); + printf("%s\n", buf); + + // Verification + void* temp = malloc(bufSize_ + 4096); + void* chkBuf = (void*)(((size_t)temp + 4095) & ~4095); + err = hipMemcpy(chkBuf, dstBuffer, bufSize_, hipMemcpyDefault); + CHECK_RESULT(err, "hipMemcpy failed"); + checkData(chkBuf, bufSize_, 0xd0); + free(temp); + + //Free src + if (hostMalloc[0]) + { + hipHostFree(srcBuffer); + } + else if (hostRegister[0]) + { + hipHostUnregister(srcBuffer); + free(memptr[0]); + } + else if (unpinnedMalloc[0]) + { + free(memptr[0]); + } + else + { + hipFree(srcBuffer); + } + + //Free dst + if (hostMalloc[1]) + { + hipHostFree(dstBuffer); + } + else if (hostRegister[1]) + { + hipHostUnregister(dstBuffer); + free(memptr[1]); + } + else if (unpinnedMalloc[1]) + { + free(memptr[1]); + } + else + { + hipFree(dstBuffer); + } + } + + passed(); +} diff --git a/tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp b/tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp new file mode 100644 index 0000000000..3d14c7b95f --- /dev/null +++ b/tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp @@ -0,0 +1,210 @@ +#include +#include +#include +#include + +#include "timer.h" +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +// Quiet pesky warnings +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + +#define CHAR_BUF_SIZE 512 + +#define CHECK_RESULT(test, msg) \ + if ((test)) \ + { \ + printf("\n%s\n", msg); \ + abort(); \ + } + +typedef struct { + unsigned int iterations; + int flushEvery; +} testStruct; + +testStruct testList[] = +{ + { 1, -1}, + { 1, -1}, + { 10, 1}, + { 10, -1}, + { 100, 1}, + { 100, 10}, + { 100, -1}, + { 1000, 1}, + { 1000, 10}, + { 1000, 100}, + { 1000, -1}, + { 10000, 1}, + { 10000, 10}, + { 10000, 100}, + { 10000, 1000}, + { 10000, -1}, + { 100000, 1}, + { 100000, 10}, + { 100000, 100}, + { 100000, 1000}, + { 100000, 10000}, + { 100000, -1}, +}; + +unsigned int mapTestList[] = {1, 1, 10, 100, 1000, 10000, 100000}; + +__global__ void _dispatchSpeed(float *outBuf) +{ + int i = (blockIdx.x * blockDim.x + threadIdx.x); + if (i < 0) + outBuf[i] = 0.0f; +}; + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + hipError_t err = hipSuccess; + hipDeviceProp_t props = {0}; + hipGetDeviceProperties(&props, p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" ); + printf("Set device to %d : %s\n", p_gpuDevice, props.name); + + unsigned int testListSize = sizeof(testList) / sizeof(testStruct); + int numTests = (p_tests == -1) ? (2*2*testListSize - 1) : p_tests; + int test = (p_tests == -1) ? 0 : p_tests; + + float* srcBuffer = NULL; + unsigned int bufSize_ = 64*sizeof(float); + err = hipMalloc(&srcBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + + for(;test <= numTests; test++) + { + int openTest = test % testListSize; + bool sleep = false; + bool doWarmup = false; + + if ((test / testListSize) % 2) + { + doWarmup = true; + } + if (test >= (testListSize * 2)) + { + sleep = true; + } + + int threads = (bufSize_ / sizeof(float)); + int threads_per_block = 64; + int blocks = (threads/threads_per_block) + (threads % threads_per_block); + hipEvent_t start, stop; + + // NULL stream check: + err = hipEventCreate(&start); + err = hipEventCreate(&stop); + + CHECK_RESULT(err != hipSuccess, "hipEventCreate failed"); + + if (doWarmup) + { + hipLaunchKernelGGL(_dispatchSpeed, dim3(blocks), dim3(threads_per_block), 0, hipStream_t(0), srcBuffer); + err = hipDeviceSynchronize(); + CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed"); + } + + CPerfCounter timer; + + timer.Reset(); + timer.Start(); + for (unsigned int i = 0; i < testList[openTest].iterations; i++) + { + hipEventRecord(start, NULL); + hipLaunchKernelGGL(_dispatchSpeed, dim3(blocks), dim3(threads_per_block), 0, hipStream_t(0), srcBuffer); + hipEventRecord(stop, NULL); + + if ((testList[openTest].flushEvery > 0) && + (((i + 1) % testList[openTest].flushEvery) == 0)) + { + if (sleep) + { + err = hipDeviceSynchronize(); + CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed"); + } + else + { + do { + err = hipEventQuery(stop); + } while (err == hipErrorNotReady); + } + } + } + if (sleep) + { + err = hipDeviceSynchronize(); + CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed"); + } + else + { + do { + err = hipEventQuery(stop); + } while (err == hipErrorNotReady); + } + timer.Stop(); + + hipEventDestroy(start); + hipEventDestroy(stop); + double sec = timer.GetElapsedTime(); + + // microseconds per launch + double perf = (1000000.f*sec/testList[openTest].iterations); + const char *waitType; + const char *extraChar; + const char *n; + const char *warmup; + if (sleep) + { + waitType = "sleep"; + extraChar = ""; + n = ""; + } + else + { + waitType = "spin"; + n = "n"; + extraChar = " "; + } + if (doWarmup) + { + warmup = "warmup"; + } + else + { + warmup = ""; + } + + + char buf[256]; + if (testList[openTest].flushEvery > 0) + { + SNPRINTF(buf, sizeof(buf), "HIPPerfDispatchSpeed[%3d] %7d dispatches %s%sing every %5d %6s (us/disp) %3f", test, testList[openTest].iterations, + waitType, n, testList[openTest].flushEvery, warmup, (float)perf); + } + else + { + SNPRINTF(buf, sizeof(buf), "HIPPerfDispatchSpeed[%3d] %7d dispatches (%s%s) %6s (us/disp) %3f", test, testList[openTest].iterations, + waitType, extraChar, warmup, (float)perf); + } + printf("%s\n", buf); + } + + hipFree(srcBuffer); + passed(); +} diff --git a/tests/src/Performance/perfDispatch/timer.cpp b/tests/src/Performance/perfDispatch/timer.cpp new file mode 100644 index 0000000000..ea9c6ea1d9 --- /dev/null +++ b/tests/src/Performance/perfDispatch/timer.cpp @@ -0,0 +1,116 @@ +#include "timer.h" + +#include + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#define VC_EXTRALEAN +#include +#pragma comment(lib, "user32") +#endif + +#ifdef __linux__ +#include +#define NANOSECONDS_PER_SEC 1000000000 +#endif + +CPerfCounter::CPerfCounter() : _clocks(0), _start(0) +{ + +#ifdef _WIN32 + + QueryPerformanceFrequency((LARGE_INTEGER *)&_freq); + +#endif + +#ifdef __linux__ + _freq = NANOSECONDS_PER_SEC; +#endif + +} + +CPerfCounter::~CPerfCounter() +{ + // EMPTY! +} + +void +CPerfCounter::Start(void) +{ + +#ifdef _WIN32 + + if( _start ) + { + MessageBox(NULL, "Bad Perf Counter Start", "Error", MB_OK); + exit(0); + } + QueryPerformanceCounter((LARGE_INTEGER *)&_start); + +#endif +#ifdef __linux__ + + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + _start = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ; + +#endif + +} + +void +CPerfCounter::Stop(void) +{ + i64 n; + +#ifdef _WIN32 + + if( !_start ) + { + MessageBox(NULL, "Bad Perf Counter Stop", "Error", MB_OK); + exit(0); + } + + QueryPerformanceCounter((LARGE_INTEGER *)&n); + +#endif +#ifdef __linux__ + + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + n = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ; + +#endif + + n -= _start; + _start = 0; + _clocks += n; +} + +void +CPerfCounter::Reset(void) +{ + +#ifdef _WIN32 + if( _start ) + { + MessageBox(NULL, "Bad Perf Counter Reset", "Error", MB_OK); + exit(0); + } +#endif + _clocks = 0; +} + +double +CPerfCounter::GetElapsedTime(void) +{ +#ifdef _WIN32 + if( _start ) { + MessageBox(NULL, "Trying to get time while still running.", "Error", MB_OK); + exit(0); + } +#endif + + return (double)_clocks / (double)_freq; + +} diff --git a/tests/src/Performance/perfDispatch/timer.h b/tests/src/Performance/perfDispatch/timer.h new file mode 100644 index 0000000000..28bfeff74b --- /dev/null +++ b/tests/src/Performance/perfDispatch/timer.h @@ -0,0 +1,28 @@ +#ifndef _TIMER_H_ +#define _TIMER_H_ + +#ifdef _WIN32 +typedef __int64 i64 ; +#endif +#ifdef __linux__ +typedef long long i64; +#endif + +class CPerfCounter { + +public: + CPerfCounter(); + ~CPerfCounter(); + void Start(void); + void Stop(void); + void Reset(void); + double GetElapsedTime(void); + +private: + + i64 _freq; + i64 _clocks; + i64 _start; +}; + +#endif // _TIMER_H_ From 19f793f1cde19c2fa1283eeb470fa98e01a43ad5 Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Thu, 16 Apr 2020 17:18:13 -0400 Subject: [PATCH 104/132] [hip] Generate assertion message in assertion. Change-Id: Ie66f6563e8728fd0e21cf22dcc6619e4a0e5c28d --- include/hip/hcc_detail/device_functions.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 0a775df275..0cef0bafeb 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -1076,6 +1076,8 @@ void __assert_fail(const char * __assertion, unsigned int __line, const char *__function) { + printf("%s:%u: %s: Device-side assertion `%s' failed.\n", __file, __line, + __function, __assertion); // Ignore all the args for now. __builtin_trap(); } From 6823232b3a063577f5554882058b256e908bf322 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 22 Apr 2020 12:49:56 -0500 Subject: [PATCH 105/132] cleanup Change-Id: Ia0ee0e4cab2ee8eaa9931024681d0db5b2802594 --- vdi/hip_prof_gen.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vdi/hip_prof_gen.py b/vdi/hip_prof_gen.py index 04f92e0a00..2eb10e9ca3 100755 --- a/vdi/hip_prof_gen.py +++ b/vdi/hip_prof_gen.py @@ -78,7 +78,6 @@ def filtr_api_args(args_str): args_str = re.sub(r'\s*$', r'', args_str); args_str = re.sub(r'\s*,\s*', r',', args_str); args_str = re.sub(r'\s+', r' ', args_str); - #args_str = re.sub(r'void \*', r'void* ', args_str); args_str = re.sub(r'\s*(\*+)\s*', r'\1 ', args_str); args_str = re.sub(r'(enum|struct) ', '', args_str); return args_str From 793dbf5bd58e58ee65a71f311c2d17a0cf9654ef Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Wed, 22 Apr 2020 15:16:59 -0400 Subject: [PATCH 106/132] Fix hip_get_devices failure in lammps Support hipDeviceAttributeIntegrated in hipDeviceGetAttribute() with hip-vdi rt Change-Id: Ie5ba81222af3554a843c184ae75af7f369a3c24b --- vdi/hip_device_runtime.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vdi/hip_device_runtime.cpp b/vdi/hip_device_runtime.cpp index 4cd6731824..86a1590533 100644 --- a/vdi/hip_device_runtime.cpp +++ b/vdi/hip_device_runtime.cpp @@ -239,6 +239,9 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) case hipDeviceAttributeCooperativeMultiDeviceLaunch: *pi = prop.cooperativeMultiDeviceLaunch; break; + case hipDeviceAttributeIntegrated: + *pi = prop.integrated; + break; case hipDeviceAttributeMaxTexture1DWidth: *pi = prop.maxTexture1D; break; From 218044577e20e8550718e613a4353aebc01f2e00 Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Wed, 22 Apr 2020 16:19:05 -0400 Subject: [PATCH 107/132] [hip] Fix typos. Change-Id: I9d85d0e70033d144dbd4d61cb434ffbe023af8c0 --- include/hip/hcc_detail/hip_runtime_api.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 206a2e5835..eb69520894 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -3854,7 +3854,7 @@ static inline hipError_t hipBindTexture( const void *devPtr, size_t size = UINT_MAX) { - return hipBindTexture(offset, tex, devPtr, tex.channelDesc, size); + return hipBindTexture(offset, &tex, devPtr, tex.channelDesc, size); } template @@ -3898,9 +3898,9 @@ static inline hipError_t hipBindTextureToArray( const struct texture &tex, hipArray_const_t array) { - struct cudaChannelFormatDesc desc; + struct hipChannelFormatDesc desc; hipError_t err = hipGetChannelDesc(&desc, array); - return (err == hipSuccess) ? hipBindTextureToArray(tex, array, desc) : err; + return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, desc) : err; } template @@ -3924,14 +3924,14 @@ static inline hipError_t hipBindTextureToMipmappedArray( return err; } err = hipGetChannelDesc(&desc, levelArray); - return (err == hipSuccess) ? hipBindTextureToMipmappedArray(tex, mipmappedArray, desc) : err; + return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, desc) : err; } template static inline hipError_t hipBindTextureToMipmappedArray( const struct texture &tex, hipMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc &desc) + const struct hipChannelFormatDesc &desc) { return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc); } From a0b5dfd625d99af7e288629747b40dd057183173 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 23 Apr 2020 21:42:06 +0530 Subject: [PATCH 108/132] Merge in the rocclr based hip runtime (#2032) * Merge master-next changes in master (include vdi development in master branch) --- CMakeLists.txt | 158 +- LICENSE => LICENSE.txt | 7 +- amdocl/CL/cl.h | 1836 +++++++++++++ amdocl/CL/cl_egl.h | 132 + amdocl/CL/cl_ext.h | 1051 +++++++ amdocl/CL/cl_gl.h | 171 ++ amdocl/CL/cl_gl_ext.h | 52 + amdocl/CL/cl_icd.h | 1269 +++++++++ amdocl/CL/cl_platform.h | 1384 ++++++++++ amdocl/CL/cl_version.h | 86 + amdocl/CL/opencl.h | 47 + amdocl/EGL/egl.h | 329 +++ amdocl/EGL/eglext.h | 645 +++++ amdocl/EGL/eglplatform.h | 125 + amdocl/KHR/khrplatform.h | 282 ++ amdocl/cl_common.hpp | 301 ++ amdocl/cl_debugger_amd.h | 694 +++++ amdocl/cl_icd.cpp | 293 ++ amdocl/cl_icd_amd.h | 739 +++++ amdocl/cl_kernel.h | 165 ++ amdocl/cl_profile_amd.h | 189 ++ amdocl/cl_thread_trace_amd.h | 363 +++ amdocl/gl_functions.hpp | 64 + amdocl/icd/loader/icd_dispatch.h | 108 + bin/hipcc | 41 +- bin/hipify-perl | 6 + cmake/FindROCR.cmake | 16 + cmake/FindROCT.cmake | 16 + configure | 0 docs/markdown/hip_profiling.md | 279 ++ hip-config.cmake.in | 4 +- include/hip/hcc_detail/channel_descriptor.h | 6 + include/hip/hcc_detail/driver_types.h | 172 +- .../hip/hcc_detail/functional_grid_launch.hpp | 10 - include/hip/hcc_detail/hip_fp16.h | 2 +- include/hip/hcc_detail/hip_runtime.h | 7 +- include/hip/hcc_detail/hip_runtime_api.h | 531 +++- include/hip/hcc_detail/hip_texture_types.h | 6 +- include/hip/hcc_detail/hiprtc.h | 4 + include/hip/hcc_detail/ockl_image.h | 135 + .../hip/hcc_detail/texture_fetch_functions.h | 386 +++ .../hcc_detail/texture_indirect_functions.h | 501 ++++ include/hip/hip_ext.h | 4 +- include/hip/hip_runtime_api.h | 5 +- packaging/hip-samples.txt | 2 +- packaging/hip-vdi.postinst | 29 + packaging/hip-vdi.prerm | 31 + packaging/hip-vdi.txt | 57 + .../hipDispatchLatency/hipDispatchLatency.cpp | 2 +- samples/2_Cookbook/13_occupancy/occupancy.cpp | 8 + samples/2_Cookbook/2_Profiler/Makefile | 53 + .../2_Cookbook/2_Profiler/MatrixTranspose.cpp | 219 ++ samples/2_Cookbook/2_Profiler/Readme.md | 47 + src/h2f.cpp | 6 +- src/hip_clang.cpp | 257 +- src/hip_hcc_internal.h | 24 +- src/hip_module.cpp | 39 +- src/hiprtc.cpp | 4 +- src/program_state.inl | 35 +- tests/hip_tests.txt | 4 +- tests/hit/HIT.cmake | 56 +- tests/src/Negative/memory/hipMemory.cpp | 2 +- .../stream/hipStreamCreateWithFlags.cpp | 2 +- .../complex_loading_behavior.cpp | 2 +- tests/src/gcc/LaunchKernel.c | 18 +- tests/src/gcc/hipMalloc.c | 8 +- tests/src/hiprtc/hiprtcGetLoweredName.cpp | 2 +- tests/src/hiprtc/saxpy.cpp | 4 +- tests/src/hostcall/hipHostcallFuncCall.cpp | 2 +- tests/src/hostcall/hipHostcallPrintThings.cpp | 2 +- tests/src/kernel/hipExtLaunchKernelGGL.cpp | 3 +- tests/src/p2p/hipPeerToPeer_simple.cpp | 6 +- tests/src/printf/hipPrintfAltForms.cpp | 76 + tests/src/printf/hipPrintfBasic.cpp | 275 ++ tests/src/printf/hipPrintfFlags.cpp | 68 + tests/src/printf/hipPrintfManyDevices.cpp | 77 + tests/src/printf/hipPrintfManyWaves.cpp | 301 ++ tests/src/printf/hipPrintfSpecifiers.cpp | 90 + tests/src/printf/hipPrintfStar.cpp | 54 + tests/src/printf/hipPrintfWidthPrecision.cpp | 74 + tests/src/printf/printf_common.h | 94 + tests/src/runtimeApi/event/hipEventIpc.cpp | 2 +- .../memory/hipMemcpyNegetiveTests.cpp | 2 +- .../runtimeApi/memory/hipMemcpyPeerAsync.cpp | 4 +- .../runtimeApi/memory/p2p_copy_coherency.cpp | 13 +- .../module/hipExtModuleLaunchKernel.cpp | 2 +- .../module/hipLaunchCoopMultiKernel.cpp | 4 +- .../module/hipLaunchCooperativeKernel.cpp | 2 +- .../module/hipModuleLoadDataMultThreaded.cpp | 2 +- .../module/hipModuleTexture2dDrv.cpp | 5 +- tests/src/runtimeApi/module/tex2d_kernel.cpp | 6 +- ...upancyMaxActiveBlocksPerMultiprocessor.cpp | 2 +- .../stream/hipStreamAddCallbackCatch.cpp | 2 +- tests/src/surface/hipSurfaceObj2D.cpp | 2 +- tests/src/test_common.h | 4 + tests/src/texture/hipBindTex2DPitch.cpp | 2 +- tests/src/texture/hipBindTexRef1DFetch.cpp | 2 +- .../texture/hipNormalizedFloatValueTex.cpp | 121 +- tests/src/texture/hipTex1DFetchCheckModes.cpp | 4 +- tests/src/texture/hipTextureRef2D.cpp | 2 +- tests/src/texture/simpleTexture2DLayered.cpp | 2 +- tests/src/texture/simpleTexture3D.cpp | 51 +- vdi/CMakeLists.txt | 179 ++ vdi/cl_gl.cpp | 2432 +++++++++++++++++ vdi/cl_gl_amd.hpp | 379 +++ vdi/cl_lqdflash_amd.cpp | 310 +++ vdi/cl_lqdflash_amd.h | 58 + vdi/fixme.cpp | 32 + vdi/hip_activity.cpp | 35 + vdi/hip_context.cpp | 373 +++ vdi/hip_conversions.hpp | 903 ++++++ vdi/hip_device.cpp | 256 ++ vdi/hip_device_runtime.cpp | 569 ++++ vdi/hip_error.cpp | 172 ++ vdi/hip_event.cpp | 254 ++ vdi/hip_event.hpp | 68 + vdi/hip_formatting.hpp | 843 ++++++ vdi/hip_hcc.def.in | 251 ++ vdi/hip_hcc.map.in | 261 ++ vdi/hip_hcc.rc | 75 + vdi/hip_intercept.cpp | 56 + vdi/hip_internal.hpp | 297 ++ vdi/hip_memory.cpp | 2188 +++++++++++++++ vdi/hip_module.cpp | 665 +++++ vdi/hip_peer.cpp | 127 + vdi/hip_platform.cpp | 1229 +++++++++ vdi/hip_platform.hpp | 29 + vdi/hip_prof_api.h | 250 ++ vdi/hip_prof_gen.py | 612 +++++ vdi/hip_profile.cpp | 40 + vdi/hip_rtc.cpp | 393 +++ vdi/hip_stream.cpp | 274 ++ vdi/hip_surface.cpp | 37 + vdi/hip_texture.cpp | 1207 ++++++++ vdi/hiprtc_internal.hpp | 65 + vdi/trace_helper.h | 254 ++ 136 files changed, 29756 insertions(+), 307 deletions(-) rename LICENSE => LICENSE.txt (85%) create mode 100644 amdocl/CL/cl.h create mode 100644 amdocl/CL/cl_egl.h create mode 100644 amdocl/CL/cl_ext.h create mode 100644 amdocl/CL/cl_gl.h create mode 100644 amdocl/CL/cl_gl_ext.h create mode 100644 amdocl/CL/cl_icd.h create mode 100644 amdocl/CL/cl_platform.h create mode 100644 amdocl/CL/cl_version.h create mode 100644 amdocl/CL/opencl.h create mode 100644 amdocl/EGL/egl.h create mode 100644 amdocl/EGL/eglext.h create mode 100644 amdocl/EGL/eglplatform.h create mode 100644 amdocl/KHR/khrplatform.h create mode 100644 amdocl/cl_common.hpp create mode 100644 amdocl/cl_debugger_amd.h create mode 100644 amdocl/cl_icd.cpp create mode 100644 amdocl/cl_icd_amd.h create mode 100644 amdocl/cl_kernel.h create mode 100644 amdocl/cl_profile_amd.h create mode 100644 amdocl/cl_thread_trace_amd.h create mode 100644 amdocl/gl_functions.hpp create mode 100644 amdocl/icd/loader/icd_dispatch.h create mode 100644 cmake/FindROCR.cmake create mode 100644 cmake/FindROCT.cmake create mode 100644 configure create mode 100644 docs/markdown/hip_profiling.md create mode 100644 include/hip/hcc_detail/ockl_image.h create mode 100644 include/hip/hcc_detail/texture_fetch_functions.h create mode 100644 include/hip/hcc_detail/texture_indirect_functions.h create mode 100755 packaging/hip-vdi.postinst create mode 100755 packaging/hip-vdi.prerm create mode 100644 packaging/hip-vdi.txt create mode 100644 samples/2_Cookbook/2_Profiler/Makefile create mode 100644 samples/2_Cookbook/2_Profiler/MatrixTranspose.cpp create mode 100644 samples/2_Cookbook/2_Profiler/Readme.md create mode 100644 tests/src/printf/hipPrintfAltForms.cpp create mode 100644 tests/src/printf/hipPrintfBasic.cpp create mode 100644 tests/src/printf/hipPrintfFlags.cpp create mode 100644 tests/src/printf/hipPrintfManyDevices.cpp create mode 100644 tests/src/printf/hipPrintfManyWaves.cpp create mode 100644 tests/src/printf/hipPrintfSpecifiers.cpp create mode 100644 tests/src/printf/hipPrintfStar.cpp create mode 100644 tests/src/printf/hipPrintfWidthPrecision.cpp create mode 100644 tests/src/printf/printf_common.h create mode 100644 vdi/CMakeLists.txt create mode 100644 vdi/cl_gl.cpp create mode 100644 vdi/cl_gl_amd.hpp create mode 100644 vdi/cl_lqdflash_amd.cpp create mode 100644 vdi/cl_lqdflash_amd.h create mode 100644 vdi/fixme.cpp create mode 100644 vdi/hip_activity.cpp create mode 100644 vdi/hip_context.cpp create mode 100644 vdi/hip_conversions.hpp create mode 100644 vdi/hip_device.cpp create mode 100644 vdi/hip_device_runtime.cpp create mode 100644 vdi/hip_error.cpp create mode 100644 vdi/hip_event.cpp create mode 100644 vdi/hip_event.hpp create mode 100644 vdi/hip_formatting.hpp create mode 100755 vdi/hip_hcc.def.in create mode 100755 vdi/hip_hcc.map.in create mode 100644 vdi/hip_hcc.rc create mode 100644 vdi/hip_intercept.cpp create mode 100755 vdi/hip_internal.hpp create mode 100644 vdi/hip_memory.cpp create mode 100755 vdi/hip_module.cpp create mode 100644 vdi/hip_peer.cpp create mode 100755 vdi/hip_platform.cpp create mode 100644 vdi/hip_platform.hpp create mode 100644 vdi/hip_prof_api.h create mode 100755 vdi/hip_prof_gen.py create mode 100644 vdi/hip_profile.cpp create mode 100644 vdi/hip_rtc.cpp create mode 100644 vdi/hip_stream.cpp create mode 100644 vdi/hip_surface.cpp create mode 100644 vdi/hip_texture.cpp create mode 100644 vdi/hiprtc_internal.hpp create mode 100644 vdi/trace_helper.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b56c47af30..9c64390d18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,15 @@ cmake_minimum_required(VERSION 3.4.3) project(hip) +# sample command for hip-vdi, you'll need to have vdi installed +# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi .. +# cmake -DHIP_COMPILER=clang -DHIP_PLATFORM=vdi -DVDI_DIR=/extra/lmoriche/hip-vdi/vdi -DOPENCL_DIR=/extra/lmoriche/clients/lmoriche_opencl_dev2/drivers/opencl/api/opencl -DLIBVDI_STATIC_DIR=/extra/lmoriche/hip-vdi/build/vdi .. + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +############################# +# Options +############################# +option(BUILD_HIPIFY_CLANG "Enable building the CUDA->HIP converter" OFF) ############################# # Setup config generation @@ -100,12 +110,19 @@ add_to_config(_buildInfo HIP_COMPILER) # Determine HIP_RUNTIME # Either HCC or VDI; default is HCC if(NOT DEFINED ENV{HIP_RUNTIME}) + if(HIP_PLATFORM STREQUAL "hcc") set(HIP_RUNTIME "HCC" CACHE STRING "HIP Runtime") -else() - set(HIP_RUNTIME $ENV{HIP_RUNTIME} CACHE STRING "HIP Runtime") + elseif (HIP_PLATFORM STREQUAL "vdi") + set(HIP_RUNTIME "VDI" CACHE STRING "HIP Runtime") + elseif (HIP_PLATFORM STREQUAL "nvcc") + set(HIP_RUNTIME "CUDA" CACHE STRING "HIP Runtime") + endif() endif() add_to_config(_buildInfo HIP_RUNTIME) +if(HIP_PLATFORM STREQUAL "vdi") + set(USE_PROF_API "1") +endif() # If HIP_PLATFORM is hcc, we need HCC_HOME and HSA_PATH to be defined if(HIP_PLATFORM STREQUAL "hcc") @@ -190,12 +207,14 @@ message (STATUS "ROCM Installation path(ROCM_PATH): ${ROCM_PATH}") set(CPACK_SET_DESTDIR ON CACHE BOOL "Installer package will install hip to CMAKE_INSTALL_PREFIX instead of CPACK_PACKAGING_INSTALL_PREFIX") if (NOT CPACK_SET_DESTDIR) set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm/hip" CACHE PATH "Default installation path of hcc installer package") -endif (CPACK_SET_DESTDIR) +endif (NOT CPACK_SET_DESTDIR) ############################# # Profiling API support ############################# # Generate profiling API macros/structures header +if(HIP_PLATFORM STREQUAL "hcc") +if(USE_PROF_API EQUAL 1) set(PROF_API_STR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_prof_str.h") set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_runtime_api.h") set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}/src") @@ -207,7 +226,6 @@ execute_process(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}") set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${PROF_API_GEN} ${PROF_API_HDR} ${PROF_API_STR}) # Enable profiling API -if(USE_PROF_API EQUAL 1) find_path(PROF_API_HEADER_DIR prof_protocol.h HINTS ${PROF_API_HEADER_PATH} @@ -224,6 +242,7 @@ if(USE_PROF_API EQUAL 1) MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}") endif() endif() +endif() ############################# # Build steps @@ -233,13 +252,48 @@ set(LIB_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib) set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/include) set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) +# Build clang hipify if enabled +if (BUILD_HIPIFY_CLANG) + add_subdirectory(hipify-clang) +endif() + # Build LPL an CA (fat binary generation / fat binary decomposition tools) if # platform is hcc; do this before the ugly hijacking of the compiler, since no # HC code is involved. -if (HIP_PLATFORM STREQUAL "hcc") - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca) -endif () +#if (HIP_PLATFORM STREQUAL "hcc") +# add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca) +#endif () +if(HIP_PLATFORM STREQUAL "vdi") + # Determine HSA_PATH + if(NOT DEFINED HSA_PATH) + if(NOT DEFINED ENV{HSA_PATH}) + set(HSA_PATH "/opt/rocm/hsa" CACHE PATH "Path to which HSA runtime has been installed") + else() + set(HSA_PATH $ENV{HSA_PATH} CACHE PATH "Path to which HSA runtime has been installed") + endif() + endif() + if(IS_ABSOLUTE ${HSA_PATH} AND EXISTS ${HSA_PATH} AND IS_DIRECTORY ${HSA_PATH}) + message(STATUS "Looking for HSA runtime in: " ${HSA_PATH}) + else() + message(FATAL_ERROR "Don't know where to find HSA runtime. Please specify absolute path using -DHSA_PATH") + endif() + + include_directories(${PROJECT_SOURCE_DIR}/include) + add_subdirectory(vdi) + file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) + + +# set(VDI_CXX_FLAGS "-hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ") + set(HIP_VDI_BUILD_FLAGS "${HIP_VDI_BUILD_FLAGS} -fPIC ${VDI_CXX_FLAGS} -I${HSA_PATH}/include") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_VDI_BUILD_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_VDI_BUILD_FLAGS}") + set(HCC_CXX_FLAGS "-hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ") + set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include") + +endif() + +message(STATUS "\nHSA runtime in: " ${HSA_PATH}) # Build hip_hcc if platform is hcc if(HIP_PLATFORM STREQUAL "hcc") include_directories(${PROJECT_SOURCE_DIR}/include) @@ -300,18 +354,18 @@ if(HIP_PLATFORM STREQUAL "hcc") set_property ( TARGET hip_hcc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" ) set_property ( TARGET hip_hcc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" ) - if(HIP_COMPILER STREQUAL "hcc") - target_link_libraries(hip_hcc PRIVATE hc_am) - target_link_libraries(hip_hcc_static PRIVATE hc_am) + target_link_libraries(hip_hcc PRIVATE hc_am) + target_link_libraries(hip_hcc_static PRIVATE hc_am) + + add_library(hiprtc SHARED src/hiprtc.cpp) + target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY) + set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" ) + set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" ) + + target_include_directories( + hiprtc SYSTEM + PRIVATE ${PROJECT_SOURCE_DIR}/include ${HSA_PATH}/include) - add_library(hiprtc SHARED src/hiprtc.cpp) - target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY) - set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" ) - set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" ) - target_include_directories( - hiprtc SYSTEM - PRIVATE ${PROJECT_SOURCE_DIR}/include ${HSA_PATH}/include) - endif() set_target_properties(hip_hcc PROPERTIES CXX_VISIBILITY_PRESET hidden) set_target_properties(hip_hcc PROPERTIES VISIBILITY_INLINES_HIDDEN 1) set_target_properties(hiprtc PROPERTIES CXX_VISIBILITY_PRESET hidden) @@ -349,6 +403,9 @@ if(HIP_PLATFORM STREQUAL "hcc") file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) endif() +if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca) +endif() # Generate .hipVersion file(WRITE "${PROJECT_BINARY_DIR}/.hipVersion" ${_versionInfo}) @@ -377,13 +434,11 @@ endif() ############################# # Install hip_hcc if platform is hcc if(HIP_PLATFORM STREQUAL "hcc") - if(HIP_COMPILER STREQUAL "hcc") - install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib) - else() - install(TARGETS hip_hcc_static hip_hcc DESTINATION lib) - endif() + install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib) +endif() - # Install .hipInfo +# Install .hipInfo +if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi") install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib) endif() @@ -406,6 +461,9 @@ endif() if(HIP_PLATFORM STREQUAL "hcc") install(TARGETS hip_hcc_static hip_hcc host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) +elseif( HIP_PLATFORM STREQUAL "vdi") +# install(TARGETS hip_on_vdi host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) +endif() include(CMakePackageConfigHelpers) configure_package_config_file( @@ -427,13 +485,12 @@ if(HIP_PLATFORM STREQUAL "hcc") DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} ) -endif() ############################# # Packaging steps ############################# # Package: hip_base -set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_base) +set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip-base) configure_file(packaging/hip-base.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) configure_file(packaging/hip-base.postinst ${BUILD_DIR}/postinst @ONLY) configure_file(packaging/hip-base.prerm ${BUILD_DIR}/prerm @ONLY) @@ -447,12 +504,19 @@ add_custom_target(pkg_hip_base COMMAND ${CMAKE_COMMAND} . WORKING_DIRECTORY ${BUILD_DIR} DEPENDS lpl ca) -# Package: hip_hcc -set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_hcc) -configure_file(packaging/hip-hcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) -configure_file(packaging/hip-hcc.postinst ${BUILD_DIR}/postinst @ONLY) -configure_file(packaging/hip-hcc.prerm ${BUILD_DIR}/prerm @ONLY) -add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} . +# Packaging needs to wait for hipify-clang to build if it's enabled... +if (BUILD_HIPIFY_CLANG) + add_dependencies(pkg_hip_base hipify-clang) +endif() + +if(HIP_PLATFORM STREQUAL "hcc") + message("HCC Package\n") + # Package: hip_hcc + set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_hcc) + configure_file(packaging/hip-hcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) + configure_file(packaging/hip-hcc.postinst ${BUILD_DIR}/postinst @ONLY) + configure_file(packaging/hip-hcc.prerm ${BUILD_DIR}/prerm @ONLY) + add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} . COMMAND rm -rf *.deb *.rpm *.tar.gz COMMAND make package COMMAND cp *.deb ${PROJECT_BINARY_DIR} @@ -460,12 +524,23 @@ add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} . COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} WORKING_DIRECTORY ${BUILD_DIR} DEPENDS hip_hcc hip_hcc_static hiprtc) +else() + set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/vdi) + configure_file(packaging/hip-vdi.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) + configure_file(packaging/hip-vdi.postinst ${BUILD_DIR}/postinst @ONLY) + configure_file(packaging/hip-vdi.prerm ${BUILD_DIR}/prerm @ONLY) + add_custom_target(hip_on_vdi COMMAND ${CMAKE_COMMAND} . + COMMAND rm -rf *.deb *.rpm *.tar.gz + COMMAND make package + COMMAND cp *.deb ${PROJECT_BINARY_DIR} + COMMAND cp *.rpm ${PROJECT_BINARY_DIR} + COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} + WORKING_DIRECTORY ${BUILD_DIR} ) +endif() # Package: hip_nvcc -set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_nvcc) +set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip-nvcc) configure_file(packaging/hip-nvcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) -configure_file(packaging/hip-nvcc.postinst ${BUILD_DIR}/postinst @ONLY) -configure_file(packaging/hip-nvcc.prerm ${BUILD_DIR}/prerm @ONLY) add_custom_target(pkg_hip_nvcc COMMAND ${CMAKE_COMMAND} . COMMAND rm -rf *.deb *.rpm *.tar.gz COMMAND make package @@ -475,7 +550,7 @@ add_custom_target(pkg_hip_nvcc COMMAND ${CMAKE_COMMAND} . WORKING_DIRECTORY ${BUILD_DIR}) # Package: hip_doc -set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_doc) +set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip-doc) configure_file(packaging/hip-doc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) add_custom_target(pkg_hip_doc COMMAND ${CMAKE_COMMAND} . COMMAND rm -rf *.deb *.rpm *.tar.gz @@ -496,6 +571,7 @@ add_custom_target(pkg_hip_samples COMMAND ${CMAKE_COMMAND} . COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} WORKING_DIRECTORY ${BUILD_DIR}) + # Package: all if(POLICY CMP0037) cmake_policy(PUSH) @@ -505,10 +581,18 @@ file(GENERATE OUTPUT ${PROJECT_BINARY_DIR}/fixnames CONTENT "pwd; for i in *.deb; do mv \"\$i\" \"\${i/.deb/-amd64.deb}\" ; done for i in *.rpm ; do mv \$i \${i/.rpm/.x86_64.rpm} ; done ") -add_custom_target(package +if(HIP_PLATFORM STREQUAL "hcc") + add_custom_target(package COMMAND bash ${PROJECT_BINARY_DIR}/fixnames WORKING_DIRECTORY ${PROJECT_BINARY_DIR} DEPENDS pkg_hip_base pkg_hip_hcc pkg_hip_nvcc pkg_hip_doc pkg_hip_samples) +elseif(HIP_PLATFORM STREQUAL "vdi") + add_custom_target(package + COMMAND bash ${PROJECT_BINARY_DIR}/fixnames + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS pkg_hip_base hip_on_vdi pkg_hip_nvcc pkg_hip_doc pkg_hip_samples) +endif() + if(POLICY CMP0037) cmake_policy(POP) endif() diff --git a/LICENSE b/LICENSE.txt similarity index 85% rename from LICENSE rename to LICENSE.txt index 586fbd5a39..e44ba39fd0 100644 --- a/LICENSE +++ b/LICENSE.txt @@ -1,5 +1,4 @@ -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2008-2020 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -13,11 +12,9 @@ all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - diff --git a/amdocl/CL/cl.h b/amdocl/CL/cl.h new file mode 100644 index 0000000000..cea6dc2405 --- /dev/null +++ b/amdocl/CL/cl.h @@ -0,0 +1,1836 @@ +/******************************************************************************* + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_device_svm_capabilities; +#endif +typedef cl_bitfield cl_command_queue_properties; +#ifdef CL_VERSION_1_2 +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; +#endif + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_queue_properties; +#endif +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_svm_mem_flags; +#endif +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +#ifdef CL_VERSION_1_2 +typedef cl_bitfield cl_mem_migration_flags; +#endif +typedef cl_uint cl_image_info; +#ifdef CL_VERSION_1_1 +typedef cl_uint cl_buffer_create_type; +#endif +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +#ifdef CL_VERSION_2_0 +typedef intptr_t cl_pipe_properties; +typedef cl_uint cl_pipe_info; +#endif +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_program_binary_type; +#endif +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +#endif +typedef cl_uint cl_kernel_work_group_info; +#ifdef CL_VERSION_2_1 +typedef cl_uint cl_kernel_sub_group_info; +#endif +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_sampler_properties; +typedef cl_uint cl_kernel_exec_info; +#endif +#ifdef CL_EXPERIMENTAL +typedef cl_bitfield cl_device_atomic_capabilities; +typedef cl_uint cl_khronos_vendor_id; +#endif + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +#ifdef CL_VERSION_1_2 + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; +#ifdef CL_VERSION_2_0 +#ifdef __GNUC__ + __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ +#endif +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 /Za builds */ +#endif + union { +#endif + cl_mem buffer; +#ifdef CL_VERSION_2_0 + cl_mem mem_object; + }; +#ifdef _MSC_VER +#pragma warning( pop ) +#endif +#endif +} cl_image_desc; + +#endif + +#ifdef CL_VERSION_1_1 + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + +#endif + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#ifdef CL_VERSION_1_1 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 +#endif + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#ifdef CL_VERSION_1_1 +#define CL_INVALID_PROPERTY -64 +#endif +#ifdef CL_VERSION_1_2 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 +#endif +#ifdef CL_VERSION_2_0 +#define CL_INVALID_PIPE_SIZE -69 +#define CL_INVALID_DEVICE_QUEUE -70 +#endif +#ifdef CL_VERSION_2_2 +#define CL_INVALID_SPEC_ID -71 +#define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72 +#endif + + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#ifdef CL_VERSION_1_2 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE +#endif + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 +#ifdef CL_VERSION_2_1 +#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 +#endif + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#endif +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A +#endif +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */ +#ifdef CL_VERSION_1_1 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#endif +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B +#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C +#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D +#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E +#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F +#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 +#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 +#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 +#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 +#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 +#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 +#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 +#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 +#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A +#endif +#ifdef CL_VERSION_2_1 +#define CL_DEVICE_IL_VERSION 0x105B +#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C +#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D +#endif + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#ifdef CL_VERSION_1_1 +#define CL_FP_SOFT_FLOAT (1 << 6) +#endif +#ifdef CL_VERSION_1_2 +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) +#endif + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_ON_DEVICE (1 << 2) +#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) +#endif + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#ifdef CL_VERSION_1_1 +#define CL_CONTEXT_NUM_DEVICES 0x1083 +#endif + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#ifdef CL_VERSION_1_2 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_device_svm_capabilities */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) + +#endif + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_SIZE 0x1094 +#endif +#ifdef CL_VERSION_2_1 +#define CL_QUEUE_DEVICE_DEFAULT 0x1095 +#endif + +/* cl_mem_flags and cl_svm_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +/* reserved (1 << 6) */ +#ifdef CL_VERSION_1_2 +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ +#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ +#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +#endif + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#ifdef CL_VERSION_1_1 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE +#endif +#ifdef CL_VERSION_2_0 +#define CL_sRGB 0x10BF +#define CL_sRGBx 0x10C0 +#define CL_sRGBA 0x10C1 +#define CL_sBGRA 0x10C2 +#define CL_ABGR 0x10C3 +#endif + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#ifdef CL_VERSION_1_2 +#define CL_UNORM_INT24 0x10DF +#endif +#ifdef CL_VERSION_2_1 +#define CL_UNORM_INT_101010_2 0x10E0 +#endif + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#ifdef CL_VERSION_1_2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_OBJECT_PIPE 0x10F7 +#endif + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#ifdef CL_VERSION_1_1 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_USES_SVM_POINTER 0x1109 +#endif + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#ifdef CL_VERSION_1_2 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_pipe_info */ +#define CL_PIPE_PACKET_SIZE 0x1120 +#define CL_PIPE_MAX_PACKETS 0x1121 + +#endif + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#ifdef CL_VERSION_1_1 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 +#endif + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 +#ifdef CL_VERSION_2_0 +/* These enumerants are for the cl_khr_mipmap_image extension. + They have since been added to cl_ext.h with an appropriate + KHR suffix, but are left here for backwards compatibility. */ +#define CL_SAMPLER_MIP_FILTER_MODE 0x1155 +#define CL_SAMPLER_LOD_MIN 0x1156 +#define CL_SAMPLER_LOD_MAX 0x1157 +#endif + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#ifdef CL_VERSION_1_2 +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) +#endif + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 +#endif +#ifdef CL_VERSION_2_1 +#define CL_PROGRAM_IL 0x1169 +#endif +#ifdef CL_VERSION_2_2 +#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A +#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B +#endif + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#endif +#ifdef CL_VERSION_2_0 +#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +#endif + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_ATTRIBUTES 0x1195 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_type_qualifier */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#ifdef CL_VERSION_2_0 +#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) +#endif + +#endif + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 +#endif + +#ifdef CL_VERSION_2_1 + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034 +#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8 +#define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9 +#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 + +#endif + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#ifdef CL_VERSION_1_1 +#define CL_EVENT_CONTEXT 0x11D4 +#endif + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#ifdef CL_VERSION_1_1 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 +#endif +#ifdef CL_VERSION_2_0 +#define CL_COMMAND_SVM_FREE 0x1209 +#define CL_COMMAND_SVM_MEMCPY 0x120A +#define CL_COMMAND_SVM_MEMFILL 0x120B +#define CL_COMMAND_SVM_MAP 0x120C +#define CL_COMMAND_SVM_UNMAP 0x120D +#endif + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +#ifdef CL_VERSION_1_1 + +/* cl_buffer_create_type */ +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +#endif + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 +#ifdef CL_VERSION_2_0 +#define CL_PROFILING_COMMAND_COMPLETE 0x1284 +#endif + +#ifdef CL_EXPERIMENTAL + +/* cl_device_atomic_capabilities - bitfield */ +#define CL_DEVICE_ATOMIC_ORDER_RELAXED (1 << 0) +#define CL_DEVICE_ATOMIC_ORDER_ACQ_REL (1 << 1) +#define CL_DEVICE_ATOMIC_ORDER_SEQ_CST (1 << 2) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM (1 << 3) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP (1 << 4) +#define CL_DEVICE_ATOMIC_SCOPE_DEVICE (1 << 5) +#define CL_DEVICE_ATOMIC_SCOPE_ALL_SVM_DEVICES (1 << 6) + +/* cl_device_info */ +#define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 +#define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 +#define CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT 0x1065 +#define CL_DEVICE_OPENCL_C_VERSIONS 0x1066 +#define CL_DEVICE_MAX_WRITE_IMAGE3D_ARGS 0x1067 +#define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068 +#define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT 0x1069 +/* 0x106A to 0x106E - Reserved for upcoming KHR extension */ +#define CL_DEVICE_OPENCL_C_FEATURES 0x106F + +/* cl_command_type */ +#define CL_COMMAND_SVM_MIGRATE_MEM 0x120E + +#endif + +/* cl_khronos_vendor_id */ +#define CL_KHRONOS_VENDOR_ID_CODEPLAY 0x10004 + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id in_device, + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetDefaultDeviceCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceAndHostTimer(cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetHostTimer(cl_device_id device, + cl_ulong * host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithProperties(cl_context context, + cl_device_id device, + const cl_queue_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreatePipe(cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format * image_formats, + cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPipeInfo(cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback(cl_mem memobj, + void (CL_CALLBACK * pfn_notify)(cl_mem memobj, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* SVM Allocation APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAlloc(cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFree(cl_context context, + void * svm_pointer) CL_API_SUFFIX__VERSION_2_0; + +#endif + +/* Sampler APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSamplerWithProperties(cl_context context, + const cl_sampler_properties * sampler_properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * kernel_names, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context context, + const void* il, + size_t length, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_headers, + const cl_program * input_headers, + const char ** header_include_names, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_programs, + const cl_program * input_programs, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramReleaseCallback(cl_program program, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_2_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramSpecializationConstant(cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value) CL_API_SUFFIX__VERSION_2_2; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program program, + const char * kernel_name, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program program, + cl_uint num_kernels, + cl_kernel * kernels, + cl_uint * num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCloneKernel(cl_kernel source_kernel, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointer(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfo(cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void * param_value) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint num_events, + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event event, + cl_event_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context context, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event event, + cl_int execution_status) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback(cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK * pfn_notify)(cl_event event, + cl_int event_command_status, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue command_queue, + cl_mem buffer, + const void * pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t * origin, + const size_t * region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t * origin, + const size_t * region, + size_t * image_row_pitch, + size_t * image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue command_queue, + void (CL_CALLBACK * user_func)(void *), + void * args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem * mem_list, + const void ** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFree(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpy(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFill(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMap(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmap(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMigrateMem(cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void ** svm_pointers, + const size_t * sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_1; + +#endif + +#ifdef CL_VERSION_1_2 + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, + const char * func_name) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + /* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ + extern CL_API_ENTRY cl_int CL_API_CALL + clSetCommandQueueProperty(cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties * old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue command_queue, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue command_queue, + cl_uint num_events, + const cl_event * event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* Deprecated OpenCL 2.0 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +clCreateSampler(cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +clEnqueueTask(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ diff --git a/amdocl/CL/cl_egl.h b/amdocl/CL/cl_egl.h new file mode 100644 index 0000000000..bc4d998eb3 --- /dev/null +++ b/amdocl/CL/cl_egl.h @@ -0,0 +1,132 @@ +/******************************************************************************* + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_EGL_H +#define __OPENCL_CL_EGL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */ +#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F +#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D +#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E + +/* Error type for clCreateFromEGLImageKHR */ +#define CL_INVALID_EGL_OBJECT_KHR -1093 +#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092 + +/* CLeglImageKHR is an opaque handle to an EGLImage */ +typedef void* CLeglImageKHR; + +/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */ +typedef void* CLeglDisplayKHR; + +/* CLeglSyncKHR is an opaque handle to an EGLSync object */ +typedef void* CLeglSyncKHR; + +/* properties passed to clCreateFromEGLImageKHR */ +typedef intptr_t cl_egl_image_properties_khr; + + +#define cl_khr_egl_image 1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromEGLImageKHR(cl_context context, + CLeglDisplayKHR egldisplay, + CLeglImageKHR eglimage, + cl_mem_flags flags, + const cl_egl_image_properties_khr * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( + cl_context context, + CLeglDisplayKHR egldisplay, + CLeglImageKHR eglimage, + cl_mem_flags flags, + const cl_egl_image_properties_khr * properties, + cl_int * errcode_ret); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +#define cl_khr_egl_event 1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromEGLSyncKHR(cl_context context, + CLeglSyncKHR sync, + CLeglDisplayKHR display, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( + cl_context context, + CLeglSyncKHR sync, + CLeglDisplayKHR display, + cl_int * errcode_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_EGL_H */ diff --git a/amdocl/CL/cl_ext.h b/amdocl/CL/cl_ext.h new file mode 100644 index 0000000000..4d6d8c093a --- /dev/null +++ b/amdocl/CL/cl_ext.h @@ -0,0 +1,1051 @@ +/******************************************************************************* + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* cl_ext.h contains OpenCL extensions which don't have external */ +/* (OpenGL, D3D) dependencies. */ + +#ifndef __CL_EXT_H +#define __CL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* cl_khr_fp64 extension - no extension #define since it has no functions */ +/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */ + +#if CL_TARGET_OPENCL_VERSION <= 110 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif + +/* cl_khr_fp16 extension - no extension #define since it has no functions */ +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 + +/* Memory object destruction + * + * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR + * + * Registers a user callback function that will be called when the memory object is deleted and its resources + * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback + * stack associated with memobj. The registered user callback functions are called in the reverse order in + * which they were registered. The user callback functions are called and then the memory object is deleted + * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be + * notified when the memory referenced by host_ptr, specified when the memory object is created and used as + * the storage bits for the memory object, can be reused or freed. + * + * The application may not call CL api's with the cl_mem object passed to the pfn_notify. + * + * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + */ +#define cl_APPLE_SetMemObjectDestructor 1 +cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj, + void (* pfn_notify)(cl_mem memobj, void * user_data), + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + + +/* Context Logging Functions + * + * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). + * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + * + * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger + */ +#define cl_APPLE_ContextLoggingFunctions 1 +extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ +extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ +extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + + +/************************ +* cl_khr_icd extension * +************************/ +#define cl_khr_icd 1 + +/* cl_platform_info */ +#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 + +/* Additional Error Codes */ +#define CL_PLATFORM_NOT_FOUND_KHR -1001 + +extern CL_API_ENTRY cl_int CL_API_CALL +clIcdGetPlatformIDsKHR(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms); + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms); + + +/******************************* + * cl_khr_il_program extension * + *******************************/ +#define cl_khr_il_program 1 + +/* New property to clGetDeviceInfo for retrieving supported intermediate + * languages + */ +#define CL_DEVICE_IL_VERSION_KHR 0x105B + +/* New property to clGetProgramInfo for retrieving for retrieving the IL of a + * program + */ +#define CL_PROGRAM_IL_KHR 0x1169 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithILKHR(cl_context context, + const void * il, + size_t length, + cl_int * errcode_ret); + +typedef CL_API_ENTRY cl_program +(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context, + const void * il, + size_t length, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +/* Extension: cl_khr_image2d_from_buffer + * + * This extension allows a 2D image to be created from a cl_mem buffer without + * a copy. The type associated with a 2D image created from a buffer in an + * OpenCL program is image2d_t. Both the sampler and sampler-less read_image + * built-in functions are supported for 2D images and 2D images created from + * a buffer. Similarly, the write_image built-ins are also supported for 2D + * images created from a buffer. + * + * When the 2D image from buffer is created, the client must specify the + * width, height, image format (i.e. channel order and channel data type) + * and optionally the row pitch. + * + * The pitch specified must be a multiple of + * CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels. + * The base address of the buffer must be aligned to + * CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels. + */ + +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B + + +/************************************** + * cl_khr_initialize_memory extension * + **************************************/ + +#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030 + + +/************************************** + * cl_khr_terminate_context extension * + **************************************/ + +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031 +#define CL_CONTEXT_TERMINATE_KHR 0x2032 + +#define cl_khr_terminate_context 1 +extern CL_API_ENTRY cl_int CL_API_CALL +clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; + + +/* + * Extension: cl_khr_spir + * + * This extension adds support to create an OpenCL program object from a + * Standard Portable Intermediate Representation (SPIR) instance + */ + +#define CL_DEVICE_SPIR_VERSIONS 0x40E0 +#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 + + +/***************************************** + * cl_khr_create_command_queue extension * + *****************************************/ +#define cl_khr_create_command_queue 1 + +typedef cl_bitfield cl_queue_properties_khr; + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithPropertiesKHR(cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_command_queue +(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + + +/****************************************** +* cl_nv_device_attribute_query extension * +******************************************/ + +/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 + +/********************************* +* cl_amd_device_memory_flags * +*********************************/ +#define cl_amd_device_memory_flags 1 +#define CL_MEM_USE_PERSISTENT_MEM_AMD (1 << 6) // Alloc from GPU's CPU visible heap + +/* cl_device_info */ +#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 + +/********************************* +* cl_amd_device_attribute_query * +*********************************/ + +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 +#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 +#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 +#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 +#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 +#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 +#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 +#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 +#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A +#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B +#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C +#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031 +#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033 +#define CL_DEVICE_PCIE_ID_AMD 0x4034 + +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_uchar unused[17]; cl_uchar bus; cl_uchar device; cl_uchar function; } pcie; +} cl_device_topology_amd; + +#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 + +/************************** +* cl_amd_offline_devices * +**************************/ +#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F + +/******************************** +* cl_amd_bus_addressable_memory * +********************************/ + +/* cl_mem flag - bitfield */ +#define CL_MEM_BUS_ADDRESSABLE_AMD (1<<30) +#define CL_MEM_EXTERNAL_PHYSICAL_AMD (1<<31) + +#define CL_COMMAND_WAIT_SIGNAL_AMD 0x4080 +#define CL_COMMAND_WRITE_SIGNAL_AMD 0x4081 +#define CL_COMMAND_MAKE_BUFFERS_RESIDENT_AMD 0x4082 + +typedef struct _cl_bus_address_amd +{ + cl_ulong surface_bus_address; + cl_ulong marker_bus_address; +} cl_bus_address_amd; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueWaitSignalAMD_fn)( cl_command_queue /*command_queue*/, + cl_mem /*mem_object*/, + cl_uint /*value*/, + cl_uint /*num_events*/, + const cl_event * /*event_wait_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueWriteSignalAMD_fn)( cl_command_queue /*command_queue*/, + cl_mem /*mem_object*/, + cl_uint /*value*/, + cl_ulong /*offset*/, + cl_uint /*num_events*/, + const cl_event * /*event_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueMakeBuffersResidentAMD_fn)( cl_command_queue /*command_queue*/, + cl_uint /*num_mem_objs*/, + cl_mem * /*mem_objects*/, + cl_bool /*blocking_make_resident*/, + cl_bus_address_amd * /*bus_addresses*/, + cl_uint /*num_events*/, + const cl_event * /*event_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +/********************** +* cl_amd_liquid_flash * +***********************/ +#define cl_amd_liquid_flash 1 + +#define CL_COMMAND_READ_SSG_FILE_AMD 0x4083 +#define CL_COMMAND_WRITE_SSG_FILE_AMD 0x4087 + +#define CL_INVALID_FILE_OBJECT_AMD 0x4084 + +typedef struct _cl_file_amd * cl_file_amd; + +typedef cl_uint cl_file_flags_amd; +#define CL_FILE_READ_ONLY_AMD (1 << 0) +#define CL_FILE_WRITE_ONLY_AMD (1 << 1) +#define CL_FILE_READ_WRITE_AMD (1 << 2) + +typedef cl_uint cl_file_info_amd; +#define CL_FILE_BLOCK_SIZE_AMD 0x4085 +#define CL_FILE_SIZE_AMD 0x4086 + +typedef CL_API_ENTRY cl_file_amd +(CL_API_CALL * clCreateSsgFileObjectAMD_fn)(cl_context /*context*/, + cl_file_flags_amd /*flags*/, + const wchar_t * /*file_name*/, + cl_int * /*errcode_ret*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clGetSsgFileObjectInfoAMD_fn)(cl_file_amd /* file */, + cl_file_info_amd /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clRetainSsgFileObjectAMD_fn)( cl_file_amd /*file*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clReleaseSsgFileObjectAMD_fn)( cl_file_amd /*file*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueReadSsgFileAMD_fn)(cl_command_queue /*command_queue*/, + cl_mem /*buffer*/, + cl_bool /*blocking_write*/, + size_t /*buffer_offset*/, + size_t /*cb*/, + cl_file_amd /*file*/, + size_t /*file_offset*/, + cl_uint /*num_events_in_wait_list*/, + const cl_event * /*event_wait_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueWriteSsgFileAMD_fn)(cl_command_queue /*command_queue*/, + cl_mem /*buffer*/, + cl_bool /*blocking_read*/, + size_t /*buffer_offset*/, + size_t /*cb*/, + cl_file_amd /*file*/, + size_t /*file_offset*/, + cl_uint /*num_events_in_wait_list*/, + const cl_event * /*event_wait_list*/, + cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +/************************* +* cl_amd_copy_buffer_p2p * +**************************/ +#define CL_DEVICE_NUM_P2P_DEVICES_AMD 0x4088 +#define CL_DEVICE_P2P_DEVICES_AMD 0x4089 + +#define cl_amd_copy_buffer_p2p 1 + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueCopyBufferP2PAMD_fn)(cl_command_queue /*command_queue*/, + cl_mem /*src_buffer*/, + cl_mem /*dst_buffer*/, + size_t /*src_offset*/, + size_t /*dst_offset*/, + size_t /*cb*/, + cl_uint /*num_events_in_wait_list*/, + const cl_event* /*event_wait_list*/, + cl_event* /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + +/*********************************** +* cl_amd_assembly_program extension * +***********************************/ +#define cl_amd_assembly_program 1 + +typedef CL_API_ENTRY cl_program (CL_API_CALL * clCreateProgramWithAssemblyAMD_fn) ( + cl_context /* context */, + cl_uint /* count */, + const char** /* strings */, + const size_t* /* lengths */, + cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2; + +#ifdef CL_VERSION_2_0 +/******************************** +* cl_amd_planar_yuv * +********************************/ + +/* cl_mem flag - bitfield */ +#define CL_YUV_IMAGE_Y_PLANE_AMD 0x0 +#define CL_YUV_IMAGE_UV_PLANE_AMD 0x1 + +typedef CL_API_ENTRY cl_mem +(CL_API_CALL * clGetPlaneFromImageAMD_fn)(cl_context /*context*/, + cl_mem /*mem*/, + cl_uint /*plane*/, + cl_int * /*errcode_ret*/) CL_EXT_SUFFIX__VERSION_2_0; +#endif + +// +/************************** +* cl_amd_command_queue_info * +**************************/ +#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E + +/* cl_kernel_exec_info for DVR DOPP texture support */ +#define CL_KERNEL_EXEC_INFO_NEW_VCOP_AMD 0x4120 +#define CL_KERNEL_EXEC_INFO_PFPA_VCOP_AMD 0x4121 + +/************************* +* cl_amd_object_metadata * +**************************/ +#define cl_amd_object_metadata 1 + +typedef size_t cl_key_amd; + +#define CL_INVALID_OBJECT_AMD 0x403A +#define CL_INVALID_KEY_AMD 0x403B +#define CL_PLATFORM_MAX_KEYS_AMD 0x403C + +typedef CL_API_ENTRY cl_key_amd (CL_API_CALL * clCreateKeyAMD_fn)( + cl_platform_id /* platform */, + void (CL_CALLBACK * /* destructor */)( void* /* old_value */), + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * clObjectGetValueForKeyAMD_fn)( + void * /* object */, + cl_key_amd /* key */, + void ** /* ret_val */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * clObjectSetValueForKeyAMD_fn)( + void * /* object */, + cl_key_amd /* key */, + void * /* value */) CL_API_SUFFIX__VERSION_1_1; +// + + +/********************************* +* cl_arm_printf extension +*********************************/ + +#define CL_PRINTF_CALLBACK_ARM 0x40B0 +#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 + + +/*********************************** +* cl_ext_device_fission extension +***********************************/ +#define cl_ext_device_fission 1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +typedef cl_ulong cl_device_partition_property_ext; +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevicesEXT(cl_device_id in_device, + const cl_device_partition_property_ext * properties, + cl_uint num_entries, + cl_device_id * out_devices, + cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device, + const cl_device_partition_property_ext * properties, + cl_uint num_entries, + cl_device_id * out_devices, + cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + +/* cl_device_partition_property_ext */ +#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 +#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 +#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 + +/* clDeviceGetInfo selectors */ +#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 +#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 +#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 +#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 +#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 + +/* clGetImageInfo enum */ +#define CL_IMAGE_BYTE_PITCH_AMD 0x4059 + +/* error codes */ +#define CL_DEVICE_PARTITION_FAILED_EXT -1057 +#define CL_INVALID_PARTITION_COUNT_EXT -1058 +#define CL_INVALID_PARTITION_NAME_EXT -1059 + +/* CL_AFFINITY_DOMAINs */ +#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 +#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 +#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 +#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 +#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 +#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 + +/* cl_device_partition_property_ext list terminators */ +#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) +#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) +#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) + + +/*********************************** + * cl_ext_migrate_memobject extension definitions + ***********************************/ +#define cl_ext_migrate_memobject 1 + +typedef cl_bitfield cl_mem_migration_flags_ext; + +#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1 + +#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags_ext flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags_ext flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +/********************************* +* cl_qcom_ext_host_ptr extension +*********************************/ +#define cl_qcom_ext_host_ptr 1 + +#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) + +#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 +#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 +#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 +#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 +#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 +#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 +#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 +#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 + +typedef cl_uint cl_image_pitch_info_qcom; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceImageInfoQCOM(cl_device_id device, + size_t image_width, + size_t image_height, + const cl_image_format *image_format, + cl_image_pitch_info_qcom param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +typedef struct _cl_mem_ext_host_ptr +{ + /* Type of external memory allocation. */ + /* Legal values will be defined in layered extensions. */ + cl_uint allocation_type; + + /* Host cache policy for this external memory allocation. */ + cl_uint host_cache_policy; + +} cl_mem_ext_host_ptr; + + +/******************************************* +* cl_qcom_ext_host_ptr_iocoherent extension +********************************************/ + +/* Cache policy specifying io-coherence */ +#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9 + + +/********************************* +* cl_qcom_ion_host_ptr extension +*********************************/ + +#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 + +typedef struct _cl_mem_ion_host_ptr +{ + /* Type of external memory allocation. */ + /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */ + cl_mem_ext_host_ptr ext_host_ptr; + + /* ION file descriptor */ + int ion_filedesc; + + /* Host pointer to the ION allocated memory */ + void* ion_hostptr; + +} cl_mem_ion_host_ptr; + + +/********************************* +* cl_qcom_android_native_buffer_host_ptr extension +*********************************/ + +#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6 + +typedef struct _cl_mem_android_native_buffer_host_ptr +{ + /* Type of external memory allocation. */ + /* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */ + cl_mem_ext_host_ptr ext_host_ptr; + + /* Virtual pointer to the android native buffer */ + void* anb_ptr; + +} cl_mem_android_native_buffer_host_ptr; + + +/****************************************** + * cl_img_yuv_image extension * + ******************************************/ + +/* Image formats used in clCreateImage */ +#define CL_NV21_IMG 0x40D0 +#define CL_YV12_IMG 0x40D1 + + +/****************************************** + * cl_img_cached_allocations extension * + ******************************************/ + +/* Flag values used by clCreateBuffer */ +#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26) +#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27) + + +/****************************************** + * cl_img_use_gralloc_ptr extension * + ******************************************/ +#define cl_img_use_gralloc_ptr 1 + +/* Flag values used by clCreateBuffer */ +#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28) + +/* To be used by clGetEventInfo: */ +#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2 +#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3 + +/* Error code from clEnqueueReleaseGrallocObjectsIMG */ +#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + + +/********************************* +* cl_khr_subgroups extension +*********************************/ +#define cl_khr_subgroups 1 + +#if !defined(CL_VERSION_2_1) +/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h. + In hindsight, there should have been a khr suffix on this type for + the extension, but keeping it un-suffixed to maintain backwards + compatibility. */ +typedef cl_uint cl_kernel_sub_group_info; +#endif + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfoKHR(cl_kernel in_kernel, + cl_device_id in_device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void * input_value, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel, + cl_device_id in_device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void * input_value, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + + +/********************************* +* cl_khr_mipmap_image extension +*********************************/ + +/* cl_sampler_properties */ +#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155 +#define CL_SAMPLER_LOD_MIN_KHR 0x1156 +#define CL_SAMPLER_LOD_MAX_KHR 0x1157 + + +/********************************* +* cl_khr_priority_hints extension +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_priority_hints 1 + +typedef cl_uint cl_queue_priority_khr; + +/* cl_command_queue_properties */ +#define CL_QUEUE_PRIORITY_KHR 0x1096 + +/* cl_queue_priority_khr */ +#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0) +#define CL_QUEUE_PRIORITY_MED_KHR (1<<1) +#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2) + + +/********************************* +* cl_khr_throttle_hints extension +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_throttle_hints 1 + +typedef cl_uint cl_queue_throttle_khr; + +/* cl_command_queue_properties */ +#define CL_QUEUE_THROTTLE_KHR 0x1097 + +/* cl_queue_throttle_khr */ +#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0) +#define CL_QUEUE_THROTTLE_MED_KHR (1<<1) +#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2) + + +/********************************* +* cl_khr_subgroup_named_barrier +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_subgroup_named_barrier 1 + +/* cl_device_info */ +#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035 + + +/********************************* +* cl_khr_extended_versioning +*********************************/ + +#define CL_VERSION_MAJOR_BITS_KHR (10) +#define CL_VERSION_MINOR_BITS_KHR (10) +#define CL_VERSION_PATCH_BITS_KHR (12) + +#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) +#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) +#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1) + +#define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) +#define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR) +#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR) + +#define CL_MAKE_VERSION_KHR(major, minor, patch) \ + ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ + (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ + ((patch) & CL_VERSION_PATCH_MASK_KHR)) + +typedef cl_uint cl_version_khr; + +#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64 + +typedef struct _cl_name_version_khr +{ + cl_version_khr version; + char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR]; +} cl_name_version_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906 +#define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907 + +/* cl_device_info */ +#define CL_DEVICE_NUMERIC_VERSION_KHR 0x105E +#define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F +#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060 +#define CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061 +#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062 + + +/********************************** + * cl_arm_import_memory extension * + **********************************/ +#define cl_arm_import_memory 1 + +typedef intptr_t cl_import_properties_arm; + +/* Default and valid proporties name for cl_arm_import_memory */ +#define CL_IMPORT_TYPE_ARM 0x40B2 + +/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_HOST_ARM 0x40B3 + +/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4 + +/* Protected memory property */ +#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5 + +/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2 + +/* Data consistency with host property */ +#define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3 + +/* Import memory size value to indicate a size for the whole buffer */ +#define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX + +/* This extension adds a new function that allows for direct memory import into + * OpenCL via the clImportMemoryARM function. + * + * Memory imported through this interface will be mapped into the device's page + * tables directly, providing zero copy access. It will never fall back to copy + * operations and aliased buffers. + * + * Types of memory supported for import are specified as additional extension + * strings. + * + * This extension produces cl_mem allocations which are compatible with all other + * users of cl_mem in the standard API. + * + * This extension maps pages with the same properties as the normal buffer creation + * function clCreateBuffer. + */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clImportMemoryARM( cl_context context, + cl_mem_flags flags, + const cl_import_properties_arm *properties, + void *memory, + size_t size, + cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; + + +/****************************************** + * cl_arm_shared_virtual_memory extension * + ******************************************/ +#define cl_arm_shared_virtual_memory 1 + +/* Used by clGetDeviceInfo */ +#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6 + +/* Used by clGetMemObjectInfo */ +#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7 + +/* Used by clSetKernelExecInfoARM: */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9 + +/* To be used by clGetEventInfo: */ +#define CL_COMMAND_SVM_FREE_ARM 0x40BA +#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB +#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC +#define CL_COMMAND_SVM_MAP_ARM 0x40BD +#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE + +/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3) + +/* Flag values used by clSVMAllocARM: */ +#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10) +#define CL_MEM_SVM_ATOMICS_ARM (1 << 11) + +typedef cl_bitfield cl_svm_mem_flags_arm; +typedef cl_uint cl_kernel_exec_info_arm; +typedef cl_bitfield cl_device_svm_capabilities_arm; + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAllocARM(cl_context context, + cl_svm_mem_flags_arm flags, + size_t size, + cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFreeARM(cl_context context, + void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFreeARM(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpyARM(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFillARM(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMapARM(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmapARM(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointerARM(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfoARM(cl_kernel kernel, + cl_kernel_exec_info_arm param_name, + size_t param_value_size, + const void * param_value) CL_EXT_SUFFIX__VERSION_1_2; + +/******************************** + * cl_arm_get_core_id extension * + ********************************/ + +#ifdef CL_VERSION_1_2 + +#define cl_arm_get_core_id 1 + +/* Device info property for bitfield of cores present */ +#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF + +#endif /* CL_VERSION_1_2 */ + +/********************************* +* cl_arm_job_slot_selection +*********************************/ + +#define cl_arm_job_slot_selection 1 + +/* cl_device_info */ +#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0 + +/* cl_command_queue_properties */ +#define CL_QUEUE_JOB_SLOT_ARM 0x41E1 + +#ifdef __cplusplus +} +#endif + + +#endif /* __CL_EXT_H */ diff --git a/amdocl/CL/cl_gl.h b/amdocl/CL/cl_gl.h new file mode 100644 index 0000000000..fbdaf62977 --- /dev/null +++ b/amdocl/CL/cl_gl.h @@ -0,0 +1,171 @@ +/********************************************************************************** + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; +typedef struct __GLsync *cl_GLsync; + +/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +#ifdef CL_VERSION_1_2 +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 +#endif + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 +#ifdef CL_VERSION_1_2 +#define CL_GL_NUM_SAMPLES 0x2012 +#endif + + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint renderbuffer, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem memobj, + cl_gl_object_type * gl_object_type, + cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem memobj, + cl_gl_texture_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* cl_khr_gl_sharing extension */ + +#define cl_khr_gl_sharing 1 + +typedef cl_uint cl_gl_context_info; + +/* Additional Error Codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR(const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( + const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_H */ diff --git a/amdocl/CL/cl_gl_ext.h b/amdocl/CL/cl_gl_ext.h new file mode 100644 index 0000000000..c26d31abed --- /dev/null +++ b/amdocl/CL/cl_gl_ext.h @@ -0,0 +1,52 @@ +/********************************************************************************** + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __OPENCL_CL_GL_EXT_H +#define __OPENCL_CL_GL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* + * cl_khr_gl_event extension + */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR(cl_context context, + cl_GLsync cl_GLsync, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_EXT_H */ diff --git a/amdocl/CL/cl_icd.h b/amdocl/CL/cl_icd.h new file mode 100644 index 0000000000..2be64719b6 --- /dev/null +++ b/amdocl/CL/cl_icd.h @@ -0,0 +1,1269 @@ +/******************************************************************************* + * Copyright (c) 2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef OPENCL_CL_ICD_H +#define OPENCL_CL_ICD_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This file contains pointer type definitions for each of the CL API calls as + * well as a type definition for the dispatch table used by the Khronos ICD + * loader (see cl_khr_icd extension specification for background). + */ + +/* API function pointer definitions */ + +// Platform APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( + cl_uint num_entries, cl_platform_id *platforms, + cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( + cl_platform_id platform, cl_platform_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Device APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( + cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( + cl_device_id device, cl_device_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( + cl_device_id in_device, + const cl_device_partition_property *partition_properties, + cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDevice)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDevice)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateSubDevices; +typedef void *cl_api_clRetainDevice; +typedef void *cl_api_clReleaseDevice; + +#endif + +// Context APIs +typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContext)( + const cl_context_properties *properties, cl_uint num_devices, + const cl_device_id *devices, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( + const cl_context_properties *properties, cl_device_type device_type, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainContext)( + cl_context context) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseContext)( + cl_context context) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetContextInfo)( + cl_context context, cl_context_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Command Queue APIs +typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( + cl_context context, cl_device_id device, + cl_command_queue_properties properties, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY +cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueueWithProperties)( + cl_context /* context */, cl_device_id /* device */, + const cl_queue_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreateCommandQueueWithProperties; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( + cl_command_queue command_queue, cl_command_queue_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Memory Object APIs +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( + cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + const cl_image_desc *image_desc, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateImage; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainMemObject)( + cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( + cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( + cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, + cl_uint num_entries, cl_image_format *image_formats, + cl_uint *num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( + cl_mem memobj, cl_mem_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetImageInfo)( + cl_mem image, cl_image_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreatePipe)( + cl_context /* context */, cl_mem_flags /* flags */, + cl_uint /* pipe_packet_size */, cl_uint /* pipe_max_packets */, + const cl_pipe_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( + cl_mem /* pipe */, cl_pipe_info /* param_name */, + size_t /* param_value_size */, void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clSVMAlloc)( + cl_context /* context */, cl_svm_mem_flags /* flags */, size_t /* size */, + unsigned int /* alignment */)CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY void(CL_API_CALL *cl_api_clSVMFree)( + cl_context /* context */, + void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreatePipe; +typedef void *cl_api_clGetPipeInfo; +typedef void *cl_api_clSVMAlloc; +typedef void *cl_api_clSVMFree; + +#endif + +// Sampler APIs +typedef CL_API_ENTRY cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( + cl_context context, cl_bool normalized_coords, + cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainSampler)( + cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseSampler)( + cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( + cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY +cl_sampler(CL_API_CALL *cl_api_clCreateSamplerWithProperties)( + cl_context /* context */, + const cl_sampler_properties * /* sampler_properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreateSamplerWithProperties; + +#endif + +// Program Object APIs +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( + cl_context context, cl_uint count, const char **strings, + const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const size_t *lengths, const unsigned char **binaries, + cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY +cl_program(CL_API_CALL *cl_api_clCreateProgramWithBuiltInKernels)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const char *kernel_names, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateProgramWithBuiltInKernels; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainProgram)( + cl_program program) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseProgram)( + cl_program program) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( + cl_program program, cl_uint num_devices, const cl_device_id *device_list, + const char *options, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCompileProgram)( + cl_program program, cl_uint num_devices, const cl_device_id *device_list, + const char *options, cl_uint num_input_headers, + const cl_program *input_headers, const char **header_include_names, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clLinkProgram)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const char *options, cl_uint num_input_programs, + const cl_program *input_programs, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCompileProgram; +typedef void *cl_api_clLinkProgram; + +#endif + +#ifdef CL_VERSION_2_2 + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clSetProgramSpecializationConstant)( + cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value) CL_API_SUFFIX__VERSION_2_2; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( + cl_program program, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_2_2; + +#else + +typedef void *cl_api_clSetProgramSpecializationConstant; +typedef void *cl_api_clSetProgramReleaseCallback; + +#endif + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( + cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clUnloadPlatformCompiler; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( + cl_program program, cl_program_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( + cl_program program, cl_device_id device, cl_program_build_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Kernel Object APIs +typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( + cl_program program, const char *kernel_name, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( + cl_program program, cl_uint num_kernels, cl_kernel *kernels, + cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainKernel)( + cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseKernel)( + cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArg)( + cl_kernel kernel, cl_uint arg_index, size_t arg_size, + const void *arg_value) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( + cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( + cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clGetKernelArgInfo; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( + cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( + cl_kernel /* kernel */, cl_uint /* arg_index */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( + cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( + cl_kernel /* in_kernel */, cl_device_id /*in_device*/, + cl_kernel_sub_group_info /* param_name */, size_t /*input_value_size*/, + const void * /*input_value*/, size_t /*param_value_size*/, + void * /*param_value*/, + size_t * /*param_value_size_ret*/) CL_EXT_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clSetKernelArgSVMPointer; +typedef void *cl_api_clSetKernelExecInfo; +typedef void *cl_api_clGetKernelSubGroupInfoKHR; + +#endif + +// Event Object APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clWaitForEvents)( + cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventInfo)( + cl_event event, cl_event_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) + CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) + CL_API_SUFFIX__VERSION_1_0; + +// Profiling APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( + cl_event event, cl_profiling_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Flush and Finish APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFlush)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFinish)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +// Enqueued Commands APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + const size_t *buffer_origin, const size_t *host_origin, + const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, void *ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueReadBufferRect; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + const size_t *buffer_origin, const size_t *host_origin, + const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueWriteBufferRect; + +#endif + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( + cl_command_queue command_queue, cl_mem buffer, const void *pattern, + size_t pattern_size, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueFillBuffer; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + size_t src_offset, size_t dst_offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + const size_t *src_origin, const size_t *dst_origin, const size_t *region, + size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, + size_t dst_slice_pitch, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueCopyBufferRect; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, + const size_t *origin, const size_t *region, size_t row_pitch, + size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, + const size_t *origin, const size_t *region, size_t input_row_pitch, + size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( + cl_command_queue command_queue, cl_mem image, const void *fill_color, + const size_t origin[3], const size_t region[3], + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueFillImage; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( + cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, + const size_t *src_origin, const size_t *dst_origin, const size_t *region, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( + cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, + const size_t *src_origin, const size_t *region, size_t dst_offset, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, + size_t src_offset, const size_t *dst_origin, const size_t *region, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, + cl_map_flags map_flags, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, + cl_map_flags map_flags, const size_t *origin, const size_t *region, + size_t *image_row_pitch, size_t *image_slice_pitch, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( + cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( + cl_command_queue command_queue, cl_uint num_mem_objects, + const cl_mem *mem_objects, cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueMigrateMemObjects; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( + cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, + const size_t *global_work_offset, const size_t *global_work_size, + const size_t *local_work_size, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueTask)( + cl_command_queue command_queue, cl_kernel kernel, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( + cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *), + void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, + const void **args_mem_loc, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( + cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( + cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY void *( + CL_API_CALL *cl_api_clGetExtensionFunctionAddressForPlatform)( + cl_platform_id platform, + const char *function_name)CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueMarkerWithWaitList; +typedef void *cl_api_clEnqueueBarrierWithWaitList; +typedef void *cl_api_clGetExtensionFunctionAddressForPlatform; + +#endif + +// Shared Virtual Memory APIs + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( + cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, + void ** /* svm_pointers */, + void(CL_CALLBACK *pfn_free_func)(cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void ** /* svm_pointers[] */, + void * /* user_data */), + void * /* user_data */, cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( + cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, + void * /* dst_ptr */, const void * /* src_ptr */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( + cl_command_queue /* command_queue */, void * /* svm_ptr */, + const void * /* pattern */, size_t /* pattern_size */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( + cl_command_queue /* command_queue */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, void * /* svm_ptr */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( + cl_command_queue /* command_queue */, void * /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clEnqueueSVMFree; +typedef void *cl_api_clEnqueueSVMMemcpy; +typedef void *cl_api_clEnqueueSVMMemFill; +typedef void *cl_api_clEnqueueSVMMap; +typedef void *cl_api_clEnqueueSVMUnmap; + +#endif + +// Deprecated APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( + cl_command_queue command_queue, cl_command_queue_properties properties, + cl_bool enable, cl_command_queue_properties *old_properties) + CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + size_t image_width, size_t image_height, size_t image_row_pitch, + void *host_ptr, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + size_t image_width, size_t image_height, size_t image_depth, + size_t image_row_pitch, size_t image_slice_pitch, void *host_ptr, + cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) + CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( + cl_command_queue command_queue, + cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( + cl_command_queue command_queue, cl_uint num_events, + const cl_event *event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( + cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( + const char *function_name)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +// GL and other APIs +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( + cl_context context, cl_mem_flags flags, cl_GLuint bufobj, + int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( + cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( + cl_mem memobj, cl_gl_object_type *gl_object_type, + cl_GLuint *gl_object_name) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( + cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +/* cl_khr_gl_sharing */ +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( + const cl_context_properties *properties, cl_gl_context_info param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); + +/* cl_khr_gl_event */ +typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( + cl_context context, cl_GLsync sync, cl_int *errcode_ret); + +#if defined(_WIN32) + +/* cl_khr_d3d10_sharing */ + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( + cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( + cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( + cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, + ID3D10Buffer *resource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( + cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( + cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_d3d11_sharing */ +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( + cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( + cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( + cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D11ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +/* cl_khr_dx9_media_sharing */ +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR)( + cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr *media_adapters_type, void *media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( + cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, + cl_uint plane, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +/* cl_khr_d3d11_sharing */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( + cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, + ID3D11Buffer *resource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( + cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( + cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_dx9_media_sharing */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( + cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr *media_adapter_type, void *media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( + cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, + cl_uint plane, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +#else + +/* cl_khr_d3d10_sharing */ +typedef void *cl_api_clGetDeviceIDsFromD3D10KHR; +typedef void *cl_api_clCreateFromD3D10BufferKHR; +typedef void *cl_api_clCreateFromD3D10Texture2DKHR; +typedef void *cl_api_clCreateFromD3D10Texture3DKHR; +typedef void *cl_api_clEnqueueAcquireD3D10ObjectsKHR; +typedef void *cl_api_clEnqueueReleaseD3D10ObjectsKHR; + +/* cl_khr_d3d11_sharing */ +typedef void *cl_api_clGetDeviceIDsFromD3D11KHR; +typedef void *cl_api_clCreateFromD3D11BufferKHR; +typedef void *cl_api_clCreateFromD3D11Texture2DKHR; +typedef void *cl_api_clCreateFromD3D11Texture3DKHR; +typedef void *cl_api_clEnqueueAcquireD3D11ObjectsKHR; +typedef void *cl_api_clEnqueueReleaseD3D11ObjectsKHR; + +/* cl_khr_dx9_media_sharing */ +typedef void *cl_api_clCreateFromDX9MediaSurfaceKHR; +typedef void *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR; +typedef void *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR; +typedef void *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR; + +#endif + +/* OpenCL 1.1 */ + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetEventCallback)( + cl_event /* event */, cl_int /* command_exec_callback_type */, + void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( + cl_mem /* buffer */, cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clSetMemObjectDestructorCallback)( + cl_mem /* memobj */, + void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, + void * /*user_data*/), + void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( + cl_context /* context */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( + cl_event /* event */, + cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clSetEventCallback; +typedef void *cl_api_clCreateSubBuffer; +typedef void *cl_api_clSetMemObjectDestructorCallback; +typedef void *cl_api_clCreateUserEvent; +typedef void *cl_api_clSetUserEventStatus; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( + cl_device_id in_device, + const cl_device_partition_property_ext *partition_properties, + cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_0; + +/* cl_khr_egl_image */ +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( + cl_context context, CLeglDisplayKHR display, CLeglImageKHR image, + cl_mem_flags flags, const cl_egl_image_properties_khr *properties, + cl_int *errcode_ret); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_egl_event */ +typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( + cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, + cl_int *errcode_ret); + +#ifdef CL_VERSION_2_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( + cl_context context, cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( + cl_context context, const void *il, size_t length, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( + cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, + size_t input_value_size, const void *input_value, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( + cl_kernel source_kernel, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( + cl_command_queue command_queue, cl_uint num_svm_pointers, + const void **svm_pointers, const size_t *sizes, + cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( + cl_device_id device, cl_ulong *device_timestamp, + cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetHostTimer)( + cl_device_id device, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#else + +typedef void *cl_api_clSetDefaultDeviceCommandQueue; +typedef void *cl_api_clCreateProgramWithIL; +typedef void *cl_api_clGetKernelSubGroupInfo; +typedef void *cl_api_clCloneKernel; +typedef void *cl_api_clEnqueueSVMMigrateMem; +typedef void *cl_api_clGetDeviceAndHostTimer; +typedef void *cl_api_clGetHostTimer; + +#endif + +/* Vendor dispatch table struture */ + +typedef struct _cl_icd_dispatch { + /* OpenCL 1.0 */ + cl_api_clGetPlatformIDs clGetPlatformIDs; + cl_api_clGetPlatformInfo clGetPlatformInfo; + cl_api_clGetDeviceIDs clGetDeviceIDs; + cl_api_clGetDeviceInfo clGetDeviceInfo; + cl_api_clCreateContext clCreateContext; + cl_api_clCreateContextFromType clCreateContextFromType; + cl_api_clRetainContext clRetainContext; + cl_api_clReleaseContext clReleaseContext; + cl_api_clGetContextInfo clGetContextInfo; + cl_api_clCreateCommandQueue clCreateCommandQueue; + cl_api_clRetainCommandQueue clRetainCommandQueue; + cl_api_clReleaseCommandQueue clReleaseCommandQueue; + cl_api_clGetCommandQueueInfo clGetCommandQueueInfo; + cl_api_clSetCommandQueueProperty clSetCommandQueueProperty; + cl_api_clCreateBuffer clCreateBuffer; + cl_api_clCreateImage2D clCreateImage2D; + cl_api_clCreateImage3D clCreateImage3D; + cl_api_clRetainMemObject clRetainMemObject; + cl_api_clReleaseMemObject clReleaseMemObject; + cl_api_clGetSupportedImageFormats clGetSupportedImageFormats; + cl_api_clGetMemObjectInfo clGetMemObjectInfo; + cl_api_clGetImageInfo clGetImageInfo; + cl_api_clCreateSampler clCreateSampler; + cl_api_clRetainSampler clRetainSampler; + cl_api_clReleaseSampler clReleaseSampler; + cl_api_clGetSamplerInfo clGetSamplerInfo; + cl_api_clCreateProgramWithSource clCreateProgramWithSource; + cl_api_clCreateProgramWithBinary clCreateProgramWithBinary; + cl_api_clRetainProgram clRetainProgram; + cl_api_clReleaseProgram clReleaseProgram; + cl_api_clBuildProgram clBuildProgram; + cl_api_clUnloadCompiler clUnloadCompiler; + cl_api_clGetProgramInfo clGetProgramInfo; + cl_api_clGetProgramBuildInfo clGetProgramBuildInfo; + cl_api_clCreateKernel clCreateKernel; + cl_api_clCreateKernelsInProgram clCreateKernelsInProgram; + cl_api_clRetainKernel clRetainKernel; + cl_api_clReleaseKernel clReleaseKernel; + cl_api_clSetKernelArg clSetKernelArg; + cl_api_clGetKernelInfo clGetKernelInfo; + cl_api_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; + cl_api_clWaitForEvents clWaitForEvents; + cl_api_clGetEventInfo clGetEventInfo; + cl_api_clRetainEvent clRetainEvent; + cl_api_clReleaseEvent clReleaseEvent; + cl_api_clGetEventProfilingInfo clGetEventProfilingInfo; + cl_api_clFlush clFlush; + cl_api_clFinish clFinish; + cl_api_clEnqueueReadBuffer clEnqueueReadBuffer; + cl_api_clEnqueueWriteBuffer clEnqueueWriteBuffer; + cl_api_clEnqueueCopyBuffer clEnqueueCopyBuffer; + cl_api_clEnqueueReadImage clEnqueueReadImage; + cl_api_clEnqueueWriteImage clEnqueueWriteImage; + cl_api_clEnqueueCopyImage clEnqueueCopyImage; + cl_api_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; + cl_api_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; + cl_api_clEnqueueMapBuffer clEnqueueMapBuffer; + cl_api_clEnqueueMapImage clEnqueueMapImage; + cl_api_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; + cl_api_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; + cl_api_clEnqueueTask clEnqueueTask; + cl_api_clEnqueueNativeKernel clEnqueueNativeKernel; + cl_api_clEnqueueMarker clEnqueueMarker; + cl_api_clEnqueueWaitForEvents clEnqueueWaitForEvents; + cl_api_clEnqueueBarrier clEnqueueBarrier; + cl_api_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; + cl_api_clCreateFromGLBuffer clCreateFromGLBuffer; + cl_api_clCreateFromGLTexture2D clCreateFromGLTexture2D; + cl_api_clCreateFromGLTexture3D clCreateFromGLTexture3D; + cl_api_clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer; + cl_api_clGetGLObjectInfo clGetGLObjectInfo; + cl_api_clGetGLTextureInfo clGetGLTextureInfo; + cl_api_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; + cl_api_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; + cl_api_clGetGLContextInfoKHR clGetGLContextInfoKHR; + + /* cl_khr_d3d10_sharing */ + cl_api_clGetDeviceIDsFromD3D10KHR clGetDeviceIDsFromD3D10KHR; + cl_api_clCreateFromD3D10BufferKHR clCreateFromD3D10BufferKHR; + cl_api_clCreateFromD3D10Texture2DKHR clCreateFromD3D10Texture2DKHR; + cl_api_clCreateFromD3D10Texture3DKHR clCreateFromD3D10Texture3DKHR; + cl_api_clEnqueueAcquireD3D10ObjectsKHR clEnqueueAcquireD3D10ObjectsKHR; + cl_api_clEnqueueReleaseD3D10ObjectsKHR clEnqueueReleaseD3D10ObjectsKHR; + + /* OpenCL 1.1 */ + cl_api_clSetEventCallback clSetEventCallback; + cl_api_clCreateSubBuffer clCreateSubBuffer; + cl_api_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; + cl_api_clCreateUserEvent clCreateUserEvent; + cl_api_clSetUserEventStatus clSetUserEventStatus; + cl_api_clEnqueueReadBufferRect clEnqueueReadBufferRect; + cl_api_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; + cl_api_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; + + /* cl_ext_device_fission */ + cl_api_clCreateSubDevicesEXT clCreateSubDevicesEXT; + cl_api_clRetainDeviceEXT clRetainDeviceEXT; + cl_api_clReleaseDeviceEXT clReleaseDeviceEXT; + + /* cl_khr_gl_event */ + cl_api_clCreateEventFromGLsyncKHR clCreateEventFromGLsyncKHR; + + /* OpenCL 1.2 */ + cl_api_clCreateSubDevices clCreateSubDevices; + cl_api_clRetainDevice clRetainDevice; + cl_api_clReleaseDevice clReleaseDevice; + cl_api_clCreateImage clCreateImage; + cl_api_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; + cl_api_clCompileProgram clCompileProgram; + cl_api_clLinkProgram clLinkProgram; + cl_api_clUnloadPlatformCompiler clUnloadPlatformCompiler; + cl_api_clGetKernelArgInfo clGetKernelArgInfo; + cl_api_clEnqueueFillBuffer clEnqueueFillBuffer; + cl_api_clEnqueueFillImage clEnqueueFillImage; + cl_api_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; + cl_api_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; + cl_api_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; + cl_api_clGetExtensionFunctionAddressForPlatform + clGetExtensionFunctionAddressForPlatform; + cl_api_clCreateFromGLTexture clCreateFromGLTexture; + + /* cl_khr_d3d11_sharing */ + cl_api_clGetDeviceIDsFromD3D11KHR clGetDeviceIDsFromD3D11KHR; + cl_api_clCreateFromD3D11BufferKHR clCreateFromD3D11BufferKHR; + cl_api_clCreateFromD3D11Texture2DKHR clCreateFromD3D11Texture2DKHR; + cl_api_clCreateFromD3D11Texture3DKHR clCreateFromD3D11Texture3DKHR; + cl_api_clCreateFromDX9MediaSurfaceKHR clCreateFromDX9MediaSurfaceKHR; + cl_api_clEnqueueAcquireD3D11ObjectsKHR clEnqueueAcquireD3D11ObjectsKHR; + cl_api_clEnqueueReleaseD3D11ObjectsKHR clEnqueueReleaseD3D11ObjectsKHR; + + /* cl_khr_dx9_media_sharing */ + cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR + clGetDeviceIDsFromDX9MediaAdapterKHR; + cl_api_clEnqueueAcquireDX9MediaSurfacesKHR + clEnqueueAcquireDX9MediaSurfacesKHR; + cl_api_clEnqueueReleaseDX9MediaSurfacesKHR + clEnqueueReleaseDX9MediaSurfacesKHR; + + /* cl_khr_egl_image */ + cl_api_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; + cl_api_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; + cl_api_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; + + /* cl_khr_egl_event */ + cl_api_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; + + /* OpenCL 2.0 */ + cl_api_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; + cl_api_clCreatePipe clCreatePipe; + cl_api_clGetPipeInfo clGetPipeInfo; + cl_api_clSVMAlloc clSVMAlloc; + cl_api_clSVMFree clSVMFree; + cl_api_clEnqueueSVMFree clEnqueueSVMFree; + cl_api_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; + cl_api_clEnqueueSVMMemFill clEnqueueSVMMemFill; + cl_api_clEnqueueSVMMap clEnqueueSVMMap; + cl_api_clEnqueueSVMUnmap clEnqueueSVMUnmap; + cl_api_clCreateSamplerWithProperties clCreateSamplerWithProperties; + cl_api_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; + cl_api_clSetKernelExecInfo clSetKernelExecInfo; + + /* cl_khr_sub_groups */ + cl_api_clGetKernelSubGroupInfoKHR clGetKernelSubGroupInfoKHR; + + /* OpenCL 2.1 */ + cl_api_clCloneKernel clCloneKernel; + cl_api_clCreateProgramWithIL clCreateProgramWithIL; + cl_api_clEnqueueSVMMigrateMem clEnqueueSVMMigrateMem; + cl_api_clGetDeviceAndHostTimer clGetDeviceAndHostTimer; + cl_api_clGetHostTimer clGetHostTimer; + cl_api_clGetKernelSubGroupInfo clGetKernelSubGroupInfo; + cl_api_clSetDefaultDeviceCommandQueue clSetDefaultDeviceCommandQueue; + + /* OpenCL 2.2 */ + cl_api_clSetProgramReleaseCallback clSetProgramReleaseCallback; + cl_api_clSetProgramSpecializationConstant clSetProgramSpecializationConstant; +} cl_icd_dispatch; + +#ifdef __cplusplus +} +#endif + +#endif /* #ifndef OPENCL_CL_ICD_H */ diff --git a/amdocl/CL/cl_platform.h b/amdocl/CL/cl_platform.h new file mode 100644 index 0000000000..7f4ddea5b3 --- /dev/null +++ b/amdocl/CL/cl_platform.h @@ -0,0 +1,1384 @@ +/********************************************************************************** + * Copyright (c) 2008-2018 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #define CL_API_ENTRY + #define CL_API_CALL __stdcall + #define CL_CALLBACK __stdcall +#else + #define CL_API_ENTRY + #define CL_API_CALL + #define CL_CALLBACK +#endif + +/* + * Deprecation flags refer to the last version of the header in which the + * feature was not deprecated. + * + * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without + * deprecation but is deprecated in versions later than 1.1. + */ + +#define CL_EXTENSION_WEAK_LINK +#define CL_API_SUFFIX__VERSION_1_0 +#define CL_EXT_SUFFIX__VERSION_1_0 +#define CL_API_SUFFIX__VERSION_1_1 +#define CL_EXT_SUFFIX__VERSION_1_1 +#define CL_API_SUFFIX__VERSION_1_2 +#define CL_EXT_SUFFIX__VERSION_1_2 +#define CL_API_SUFFIX__VERSION_2_0 +#define CL_EXT_SUFFIX__VERSION_2_0 +#define CL_API_SUFFIX__VERSION_2_1 +#define CL_EXT_SUFFIX__VERSION_2_1 +#define CL_API_SUFFIX__VERSION_2_2 +#define CL_EXT_SUFFIX__VERSION_2_2 + + +#ifdef __GNUC__ + #define CL_EXT_SUFFIX_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX_DEPRECATED +#elif defined(_WIN32) + #define CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX_DEPRECATED __declspec(deprecated) +#else + #define CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 1.7976931348623158e+308 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short; +typedef uint16_t cl_ushort; +typedef int32_t cl_int; +typedef uint32_t cl_uint; +typedef int64_t cl_long; +typedef uint64_t cl_ulong; + +typedef uint16_t cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + typedef __vector unsigned char __cl_uchar16; + typedef __vector signed char __cl_char16; + typedef __vector unsigned short __cl_ushort8; + typedef __vector signed short __cl_short8; + typedef __vector unsigned int __cl_uint4; + typedef __vector signed int __cl_int4; + typedef __vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define capabilities for anonymous struct members. */ +#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >= 1500 + /* Microsoft Developer Studio 2008 supports anonymous structs, but + * complains by default. */ + #define __CL_HAS_ANON_STRUCT__ 1 + #define __CL_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless + * struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) + #endif +#else +#define __CL_HAS_ANON_STRUCT__ 0 +#define __CL_ANON_STRUCT__ +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if __CL_HAS_ANON_STRUCT__ + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + + +/* ---- cl_halfn ---- */ +typedef union +{ + cl_half CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_half lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2; +#endif +}cl_half2; + +typedef union +{ + cl_half CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[2]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4; +#endif +}cl_half4; + +/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */ +typedef cl_half4 cl_half3; + +typedef union +{ + cl_half CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[4]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[2]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8; +#endif +}cl_half8; + +typedef union +{ + cl_half CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[8]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[4]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8[2]; +#endif +#if defined( __CL_HALF16__ ) + __cl_half16 v16; +#endif +}cl_half16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#undef __CL_HAS_ANON_STRUCT__ +#undef __CL_ANON_STRUCT__ +#if defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >=1500 + #pragma warning( pop ) + #endif +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/amdocl/CL/cl_version.h b/amdocl/CL/cl_version.h new file mode 100644 index 0000000000..bb766cb9bb --- /dev/null +++ b/amdocl/CL/cl_version.h @@ -0,0 +1,86 @@ +/******************************************************************************* + * Copyright (c) 2018 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __CL_VERSION_H +#define __CL_VERSION_H + +/* Detect which version to target */ +#if !defined(CL_TARGET_OPENCL_VERSION) +#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)") +#define CL_TARGET_OPENCL_VERSION 220 +#endif +#if CL_TARGET_OPENCL_VERSION != 100 && \ + CL_TARGET_OPENCL_VERSION != 110 && \ + CL_TARGET_OPENCL_VERSION != 120 && \ + CL_TARGET_OPENCL_VERSION != 200 && \ + CL_TARGET_OPENCL_VERSION != 210 && \ + CL_TARGET_OPENCL_VERSION != 220 +#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)") +#undef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 220 +#endif + + +/* OpenCL Version */ +#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2) +#define CL_VERSION_2_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1) +#define CL_VERSION_2_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0) +#define CL_VERSION_2_0 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2) +#define CL_VERSION_1_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1) +#define CL_VERSION_1_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0) +#define CL_VERSION_1_0 1 +#endif + +/* Allow deprecated APIs for older OpenCL versions. */ +#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif + +#endif /* __CL_VERSION_H */ diff --git a/amdocl/CL/opencl.h b/amdocl/CL/opencl.h new file mode 100644 index 0000000000..143d1d2dc6 --- /dev/null +++ b/amdocl/CL/opencl.h @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (c) 2008-2015 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ + +#ifndef __OPENCL_H +#define __OPENCL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_H */ diff --git a/amdocl/EGL/egl.h b/amdocl/EGL/egl.h new file mode 100644 index 0000000000..99ea342a47 --- /dev/null +++ b/amdocl/EGL/egl.h @@ -0,0 +1,329 @@ +/* -*- mode: c; tab-width: 8; -*- */ +/* vi: set sw=4 ts=8: */ +/* Reference version of egl.h for EGL 1.4. + * $Revision: 9356 $ on $Date: 2009-10-21 02:52:25 -0700 (Wed, 21 Oct 2009) $ + */ + +/* +** Copyright (c) 2007-2009 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +#ifndef __egl_h_ +#define __egl_h_ + +/* All platform-dependent types and macro boilerplate (such as EGLAPI + * and EGLAPIENTRY) should go in eglplatform.h. + */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* EGL Types */ +/* EGLint is defined in eglplatform.h */ +typedef unsigned int EGLBoolean; +typedef unsigned int EGLenum; +typedef void *EGLConfig; +typedef void *EGLContext; +typedef void *EGLDisplay; +typedef void *EGLSurface; +typedef void *EGLClientBuffer; + +/* EGL Versioning */ +#define EGL_VERSION_1_0 1 +#define EGL_VERSION_1_1 1 +#define EGL_VERSION_1_2 1 +#define EGL_VERSION_1_3 1 +#define EGL_VERSION_1_4 1 + +/* EGL Enumerants. Bitmasks and other exceptional cases aside, most + * enums are assigned unique values starting at 0x3000. + */ + +/* EGL aliases */ +#define EGL_FALSE 0 +#define EGL_TRUE 1 + +/* Out-of-band handle values */ +#define EGL_DEFAULT_DISPLAY ((EGLNativeDisplayType)0) +#define EGL_NO_CONTEXT ((EGLContext)0) +#define EGL_NO_DISPLAY ((EGLDisplay)0) +#define EGL_NO_SURFACE ((EGLSurface)0) + +/* Out-of-band attribute value */ +#define EGL_DONT_CARE ((EGLint)-1) + +/* Errors / GetError return values */ +#define EGL_SUCCESS 0x3000 +#define EGL_NOT_INITIALIZED 0x3001 +#define EGL_BAD_ACCESS 0x3002 +#define EGL_BAD_ALLOC 0x3003 +#define EGL_BAD_ATTRIBUTE 0x3004 +#define EGL_BAD_CONFIG 0x3005 +#define EGL_BAD_CONTEXT 0x3006 +#define EGL_BAD_CURRENT_SURFACE 0x3007 +#define EGL_BAD_DISPLAY 0x3008 +#define EGL_BAD_MATCH 0x3009 +#define EGL_BAD_NATIVE_PIXMAP 0x300A +#define EGL_BAD_NATIVE_WINDOW 0x300B +#define EGL_BAD_PARAMETER 0x300C +#define EGL_BAD_SURFACE 0x300D +#define EGL_CONTEXT_LOST 0x300E /* EGL 1.1 - IMG_power_management */ + +/* Reserved 0x300F-0x301F for additional errors */ + +/* Config attributes */ +#define EGL_BUFFER_SIZE 0x3020 +#define EGL_ALPHA_SIZE 0x3021 +#define EGL_BLUE_SIZE 0x3022 +#define EGL_GREEN_SIZE 0x3023 +#define EGL_RED_SIZE 0x3024 +#define EGL_DEPTH_SIZE 0x3025 +#define EGL_STENCIL_SIZE 0x3026 +#define EGL_CONFIG_CAVEAT 0x3027 +#define EGL_CONFIG_ID 0x3028 +#define EGL_LEVEL 0x3029 +#define EGL_MAX_PBUFFER_HEIGHT 0x302A +#define EGL_MAX_PBUFFER_PIXELS 0x302B +#define EGL_MAX_PBUFFER_WIDTH 0x302C +#define EGL_NATIVE_RENDERABLE 0x302D +#define EGL_NATIVE_VISUAL_ID 0x302E +#define EGL_NATIVE_VISUAL_TYPE 0x302F +#define EGL_SAMPLES 0x3031 +#define EGL_SAMPLE_BUFFERS 0x3032 +#define EGL_SURFACE_TYPE 0x3033 +#define EGL_TRANSPARENT_TYPE 0x3034 +#define EGL_TRANSPARENT_BLUE_VALUE 0x3035 +#define EGL_TRANSPARENT_GREEN_VALUE 0x3036 +#define EGL_TRANSPARENT_RED_VALUE 0x3037 +#define EGL_NONE 0x3038 /* Attrib list terminator */ +#define EGL_BIND_TO_TEXTURE_RGB 0x3039 +#define EGL_BIND_TO_TEXTURE_RGBA 0x303A +#define EGL_MIN_SWAP_INTERVAL 0x303B +#define EGL_MAX_SWAP_INTERVAL 0x303C +#define EGL_LUMINANCE_SIZE 0x303D +#define EGL_ALPHA_MASK_SIZE 0x303E +#define EGL_COLOR_BUFFER_TYPE 0x303F +#define EGL_RENDERABLE_TYPE 0x3040 +#define EGL_MATCH_NATIVE_PIXMAP 0x3041 /* Pseudo-attribute (not queryable) */ +#define EGL_CONFORMANT 0x3042 + +/* Reserved 0x3041-0x304F for additional config attributes */ + +/* Config attribute values */ +#define EGL_SLOW_CONFIG 0x3050 /* EGL_CONFIG_CAVEAT value */ +#define EGL_NON_CONFORMANT_CONFIG 0x3051 /* EGL_CONFIG_CAVEAT value */ +#define EGL_TRANSPARENT_RGB 0x3052 /* EGL_TRANSPARENT_TYPE value */ +#define EGL_RGB_BUFFER 0x308E /* EGL_COLOR_BUFFER_TYPE value */ +#define EGL_LUMINANCE_BUFFER 0x308F /* EGL_COLOR_BUFFER_TYPE value */ + +/* More config attribute values, for EGL_TEXTURE_FORMAT */ +#define EGL_NO_TEXTURE 0x305C +#define EGL_TEXTURE_RGB 0x305D +#define EGL_TEXTURE_RGBA 0x305E +#define EGL_TEXTURE_2D 0x305F + +/* Config attribute mask bits */ +#define EGL_PBUFFER_BIT 0x0001 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_PIXMAP_BIT 0x0002 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_WINDOW_BIT 0x0004 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200 /* EGL_SURFACE_TYPE mask bits */ +#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400 /* EGL_SURFACE_TYPE mask bits */ + +#define EGL_OPENGL_ES_BIT 0x0001 /* EGL_RENDERABLE_TYPE mask bits */ +#define EGL_OPENVG_BIT 0x0002 /* EGL_RENDERABLE_TYPE mask bits */ +#define EGL_OPENGL_ES2_BIT 0x0004 /* EGL_RENDERABLE_TYPE mask bits */ +#define EGL_OPENGL_BIT 0x0008 /* EGL_RENDERABLE_TYPE mask bits */ + +/* QueryString targets */ +#define EGL_VENDOR 0x3053 +#define EGL_VERSION 0x3054 +#define EGL_EXTENSIONS 0x3055 +#define EGL_CLIENT_APIS 0x308D + +/* QuerySurface / SurfaceAttrib / CreatePbufferSurface targets */ +#define EGL_HEIGHT 0x3056 +#define EGL_WIDTH 0x3057 +#define EGL_LARGEST_PBUFFER 0x3058 +#define EGL_TEXTURE_FORMAT 0x3080 +#define EGL_TEXTURE_TARGET 0x3081 +#define EGL_MIPMAP_TEXTURE 0x3082 +#define EGL_MIPMAP_LEVEL 0x3083 +#define EGL_RENDER_BUFFER 0x3086 +#define EGL_VG_COLORSPACE 0x3087 +#define EGL_VG_ALPHA_FORMAT 0x3088 +#define EGL_HORIZONTAL_RESOLUTION 0x3090 +#define EGL_VERTICAL_RESOLUTION 0x3091 +#define EGL_PIXEL_ASPECT_RATIO 0x3092 +#define EGL_SWAP_BEHAVIOR 0x3093 +#define EGL_MULTISAMPLE_RESOLVE 0x3099 + +/* EGL_RENDER_BUFFER values / BindTexImage / ReleaseTexImage buffer targets */ +#define EGL_BACK_BUFFER 0x3084 +#define EGL_SINGLE_BUFFER 0x3085 + +/* OpenVG color spaces */ +#define EGL_VG_COLORSPACE_sRGB 0x3089 /* EGL_VG_COLORSPACE value */ +#define EGL_VG_COLORSPACE_LINEAR 0x308A /* EGL_VG_COLORSPACE value */ + +/* OpenVG alpha formats */ +#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B /* EGL_ALPHA_FORMAT value */ +#define EGL_VG_ALPHA_FORMAT_PRE 0x308C /* EGL_ALPHA_FORMAT value */ + +/* Constant scale factor by which fractional display resolutions & + * aspect ratio are scaled when queried as integer values. + */ +#define EGL_DISPLAY_SCALING 10000 + +/* Unknown display resolution/aspect ratio */ +#define EGL_UNKNOWN ((EGLint)-1) + +/* Back buffer swap behaviors */ +#define EGL_BUFFER_PRESERVED 0x3094 /* EGL_SWAP_BEHAVIOR value */ +#define EGL_BUFFER_DESTROYED 0x3095 /* EGL_SWAP_BEHAVIOR value */ + +/* CreatePbufferFromClientBuffer buffer types */ +#define EGL_OPENVG_IMAGE 0x3096 + +/* QueryContext targets */ +#define EGL_CONTEXT_CLIENT_TYPE 0x3097 + +/* CreateContext attributes */ +#define EGL_CONTEXT_CLIENT_VERSION 0x3098 + +/* Multisample resolution behaviors */ +#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A /* EGL_MULTISAMPLE_RESOLVE value */ +#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B /* EGL_MULTISAMPLE_RESOLVE value */ + +/* BindAPI/QueryAPI targets */ +#define EGL_OPENGL_ES_API 0x30A0 +#define EGL_OPENVG_API 0x30A1 +#define EGL_OPENGL_API 0x30A2 + +/* GetCurrentSurface targets */ +#define EGL_DRAW 0x3059 +#define EGL_READ 0x305A + +/* WaitNative engines */ +#define EGL_CORE_NATIVE_ENGINE 0x305B + +/* EGL 1.2 tokens renamed for consistency in EGL 1.3 */ +#define EGL_COLORSPACE EGL_VG_COLORSPACE +#define EGL_ALPHA_FORMAT EGL_VG_ALPHA_FORMAT +#define EGL_COLORSPACE_sRGB EGL_VG_COLORSPACE_sRGB +#define EGL_COLORSPACE_LINEAR EGL_VG_COLORSPACE_LINEAR +#define EGL_ALPHA_FORMAT_NONPRE EGL_VG_ALPHA_FORMAT_NONPRE +#define EGL_ALPHA_FORMAT_PRE EGL_VG_ALPHA_FORMAT_PRE + +/* EGL extensions must request enum blocks from the Khronos + * API Registrar, who maintains the enumerant registry. Submit + * a bug in Khronos Bugzilla against task "Registry". + */ + + + +/* EGL Functions */ + +EGLAPI EGLint EGLAPIENTRY eglGetError(void); + +EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType display_id); +EGLAPI EGLBoolean EGLAPIENTRY eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor); +EGLAPI EGLBoolean EGLAPIENTRY eglTerminate(EGLDisplay dpy); + +EGLAPI const char * EGLAPIENTRY eglQueryString(EGLDisplay dpy, EGLint name); + +EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs(EGLDisplay dpy, EGLConfig *configs, + EGLint config_size, EGLint *num_config); +EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list, + EGLConfig *configs, EGLint config_size, + EGLint *num_config); +EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib(EGLDisplay dpy, EGLConfig config, + EGLint attribute, EGLint *value); + +EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, + EGLNativeWindowType win, + const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config, + const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config, + EGLNativePixmapType pixmap, + const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface(EGLDisplay dpy, EGLSurface surface); +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface(EGLDisplay dpy, EGLSurface surface, + EGLint attribute, EGLint *value); + +EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI(EGLenum api); +EGLAPI EGLenum EGLAPIENTRY eglQueryAPI(void); + +EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient(void); + +EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread(void); + +EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer( + EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, + EGLConfig config, const EGLint *attrib_list); + +EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib(EGLDisplay dpy, EGLSurface surface, + EGLint attribute, EGLint value); +EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer); +EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer); + + +EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval(EGLDisplay dpy, EGLint interval); + + +EGLAPI EGLContext EGLAPIENTRY eglCreateContext(EGLDisplay dpy, EGLConfig config, + EGLContext share_context, + const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext(EGLDisplay dpy, EGLContext ctx); +EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, + EGLSurface read, EGLContext ctx); + +EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext(void); +EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface(EGLint readdraw); +EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay(void); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext(EGLDisplay dpy, EGLContext ctx, + EGLint attribute, EGLint *value); + +EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL(void); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative(EGLint engine); +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers(EGLDisplay dpy, EGLSurface surface); +EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, + EGLNativePixmapType target); + +/* This is a generic function pointer type, whose name indicates it must + * be cast to the proper type *and calling convention* before use. + */ +typedef void (*__eglMustCastToProperFunctionPointerType)(void); + +/* Now, define eglGetProcAddress using the generic function ptr. type */ +EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY + eglGetProcAddress(const char *procname); + +#ifdef __cplusplus +} +#endif + +#endif /* __egl_h_ */ diff --git a/amdocl/EGL/eglext.h b/amdocl/EGL/eglext.h new file mode 100644 index 0000000000..2317b0cf45 --- /dev/null +++ b/amdocl/EGL/eglext.h @@ -0,0 +1,645 @@ +#ifndef __eglext_h_ +#define __eglext_h_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** Copyright (c) 2013 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ +/* +** This header is generated from the Khronos OpenGL / OpenGL ES XML +** API Registry. The current version of the Registry, generator scripts +** used to make the header, and the header can be found at +** http://www.opengl.org/registry/ +** +** Khronos $Revision: 24350 $ on $Date: 2013-12-04 12:46:23 -0800 (Wed, 04 Dec 2013) $ +*/ + +#include + +#define EGL_EGLEXT_VERSION 20131204 + +/* Generated C header for: + * API: egl + * Versions considered: .* + * Versions emitted: _nomatch_^ + * Default extensions included: egl + * Additional extensions included: _nomatch_^ + * Extensions removed: _nomatch_^ + */ + +#ifndef EGL_KHR_cl_event +#define EGL_KHR_cl_event 1 +#define EGL_CL_EVENT_HANDLE_KHR 0x309C +#define EGL_SYNC_CL_EVENT_KHR 0x30FE +#define EGL_SYNC_CL_EVENT_COMPLETE_KHR 0x30FF +#endif /* EGL_KHR_cl_event */ + +#ifndef EGL_KHR_cl_event2 +#define EGL_KHR_cl_event2 1 +typedef void *EGLSyncKHR; +typedef intptr_t EGLAttribKHR; +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNC64KHRPROC) (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSync64KHR (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list); +#endif +#endif /* EGL_KHR_cl_event2 */ + +#ifndef EGL_KHR_client_get_all_proc_addresses +#define EGL_KHR_client_get_all_proc_addresses 1 +#endif /* EGL_KHR_client_get_all_proc_addresses */ + +#ifndef EGL_KHR_config_attribs +#define EGL_KHR_config_attribs 1 +#define EGL_CONFORMANT_KHR 0x3042 +#define EGL_VG_COLORSPACE_LINEAR_BIT_KHR 0x0020 +#define EGL_VG_ALPHA_FORMAT_PRE_BIT_KHR 0x0040 +#endif /* EGL_KHR_config_attribs */ + +#ifndef EGL_KHR_create_context +#define EGL_KHR_create_context 1 +#define EGL_CONTEXT_MAJOR_VERSION_KHR 0x3098 +#define EGL_CONTEXT_MINOR_VERSION_KHR 0x30FB +#define EGL_CONTEXT_FLAGS_KHR 0x30FC +#define EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR 0x30FD +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_KHR 0x31BD +#define EGL_NO_RESET_NOTIFICATION_KHR 0x31BE +#define EGL_LOSE_CONTEXT_ON_RESET_KHR 0x31BF +#define EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR 0x00000001 +#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR 0x00000002 +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR 0x00000004 +#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR 0x00000001 +#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT_KHR 0x00000002 +#define EGL_OPENGL_ES3_BIT_KHR 0x00000040 +#endif /* EGL_KHR_create_context */ + +#ifndef EGL_KHR_fence_sync +#define EGL_KHR_fence_sync 1 +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR 0x30F0 +#define EGL_SYNC_CONDITION_KHR 0x30F8 +#define EGL_SYNC_FENCE_KHR 0x30F9 +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_KHR_fence_sync */ + +#ifndef EGL_KHR_get_all_proc_addresses +#define EGL_KHR_get_all_proc_addresses 1 +#endif /* EGL_KHR_get_all_proc_addresses */ + +#ifndef EGL_KHR_gl_renderbuffer_image +#define EGL_KHR_gl_renderbuffer_image 1 +#define EGL_GL_RENDERBUFFER_KHR 0x30B9 +#endif /* EGL_KHR_gl_renderbuffer_image */ + +#ifndef EGL_KHR_gl_texture_2D_image +#define EGL_KHR_gl_texture_2D_image 1 +#define EGL_GL_TEXTURE_2D_KHR 0x30B1 +#define EGL_GL_TEXTURE_LEVEL_KHR 0x30BC +#endif /* EGL_KHR_gl_texture_2D_image */ + +#ifndef EGL_KHR_gl_texture_3D_image +#define EGL_KHR_gl_texture_3D_image 1 +#define EGL_GL_TEXTURE_3D_KHR 0x30B2 +#define EGL_GL_TEXTURE_ZOFFSET_KHR 0x30BD +#endif /* EGL_KHR_gl_texture_3D_image */ + +#ifndef EGL_KHR_gl_texture_cubemap_image +#define EGL_KHR_gl_texture_cubemap_image 1 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR 0x30B3 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR 0x30B4 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR 0x30B5 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR 0x30B6 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR 0x30B7 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR 0x30B8 +#endif /* EGL_KHR_gl_texture_cubemap_image */ + +#ifndef EGL_KHR_image +#define EGL_KHR_image 1 +typedef void *EGLImageKHR; +#define EGL_NATIVE_PIXMAP_KHR 0x30B0 +#define EGL_NO_IMAGE_KHR ((EGLImageKHR)0) +typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEIMAGEKHRPROC) (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYIMAGEKHRPROC) (EGLDisplay dpy, EGLImageKHR image); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLImageKHR EGLAPIENTRY eglCreateImageKHR (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImageKHR (EGLDisplay dpy, EGLImageKHR image); +#endif +#endif /* EGL_KHR_image */ + +#ifndef EGL_KHR_image_base +#define EGL_KHR_image_base 1 +#define EGL_IMAGE_PRESERVED_KHR 0x30D2 +#endif /* EGL_KHR_image_base */ + +#ifndef EGL_KHR_image_pixmap +#define EGL_KHR_image_pixmap 1 +#endif /* EGL_KHR_image_pixmap */ + +#ifndef EGL_KHR_lock_surface +#define EGL_KHR_lock_surface 1 +#define EGL_READ_SURFACE_BIT_KHR 0x0001 +#define EGL_WRITE_SURFACE_BIT_KHR 0x0002 +#define EGL_LOCK_SURFACE_BIT_KHR 0x0080 +#define EGL_OPTIMAL_FORMAT_BIT_KHR 0x0100 +#define EGL_MATCH_FORMAT_KHR 0x3043 +#define EGL_FORMAT_RGB_565_EXACT_KHR 0x30C0 +#define EGL_FORMAT_RGB_565_KHR 0x30C1 +#define EGL_FORMAT_RGBA_8888_EXACT_KHR 0x30C2 +#define EGL_FORMAT_RGBA_8888_KHR 0x30C3 +#define EGL_MAP_PRESERVE_PIXELS_KHR 0x30C4 +#define EGL_LOCK_USAGE_HINT_KHR 0x30C5 +#define EGL_BITMAP_POINTER_KHR 0x30C6 +#define EGL_BITMAP_PITCH_KHR 0x30C7 +#define EGL_BITMAP_ORIGIN_KHR 0x30C8 +#define EGL_BITMAP_PIXEL_RED_OFFSET_KHR 0x30C9 +#define EGL_BITMAP_PIXEL_GREEN_OFFSET_KHR 0x30CA +#define EGL_BITMAP_PIXEL_BLUE_OFFSET_KHR 0x30CB +#define EGL_BITMAP_PIXEL_ALPHA_OFFSET_KHR 0x30CC +#define EGL_BITMAP_PIXEL_LUMINANCE_OFFSET_KHR 0x30CD +#define EGL_LOWER_LEFT_KHR 0x30CE +#define EGL_UPPER_LEFT_KHR 0x30CF +typedef EGLBoolean (EGLAPIENTRYP PFNEGLLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLUNLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglLockSurfaceKHR (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglUnlockSurfaceKHR (EGLDisplay dpy, EGLSurface surface); +#endif +#endif /* EGL_KHR_lock_surface */ + +#ifndef EGL_KHR_lock_surface2 +#define EGL_KHR_lock_surface2 1 +#define EGL_BITMAP_PIXEL_SIZE_KHR 0x3110 +#endif /* EGL_KHR_lock_surface2 */ + +#ifndef EGL_KHR_lock_surface3 +#define EGL_KHR_lock_surface3 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACE64KHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface64KHR (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value); +#endif +#endif /* EGL_KHR_lock_surface3 */ + +#ifndef EGL_KHR_reusable_sync +#define EGL_KHR_reusable_sync 1 +typedef khronos_utime_nanoseconds_t EGLTimeKHR; +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_SYNC_STATUS_KHR 0x30F1 +#define EGL_SIGNALED_KHR 0x30F2 +#define EGL_UNSIGNALED_KHR 0x30F3 +#define EGL_TIMEOUT_EXPIRED_KHR 0x30F5 +#define EGL_CONDITION_SATISFIED_KHR 0x30F6 +#define EGL_SYNC_TYPE_KHR 0x30F7 +#define EGL_SYNC_REUSABLE_KHR 0x30FA +#define EGL_SYNC_FLUSH_COMMANDS_BIT_KHR 0x0001 +#define EGL_FOREVER_KHR 0xFFFFFFFFFFFFFFFFull +#define EGL_NO_SYNC_KHR ((EGLSyncKHR)0) +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync); +typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync); +EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); +EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode); +EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_KHR_reusable_sync */ + +#ifndef EGL_KHR_stream +#define EGL_KHR_stream 1 +typedef void *EGLStreamKHR; +typedef khronos_uint64_t EGLuint64KHR; +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_NO_STREAM_KHR ((EGLStreamKHR)0) +#define EGL_CONSUMER_LATENCY_USEC_KHR 0x3210 +#define EGL_PRODUCER_FRAME_KHR 0x3212 +#define EGL_CONSUMER_FRAME_KHR 0x3213 +#define EGL_STREAM_STATE_KHR 0x3214 +#define EGL_STREAM_STATE_CREATED_KHR 0x3215 +#define EGL_STREAM_STATE_CONNECTING_KHR 0x3216 +#define EGL_STREAM_STATE_EMPTY_KHR 0x3217 +#define EGL_STREAM_STATE_NEW_FRAME_AVAILABLE_KHR 0x3218 +#define EGL_STREAM_STATE_OLD_FRAME_AVAILABLE_KHR 0x3219 +#define EGL_STREAM_STATE_DISCONNECTED_KHR 0x321A +#define EGL_BAD_STREAM_KHR 0x321B +#define EGL_BAD_STATE_KHR 0x321C +typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMKHRPROC) (EGLDisplay dpy, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMU64KHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamKHR (EGLDisplay dpy, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyStreamKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLBoolean EGLAPIENTRY eglStreamAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamu64KHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_KHR_stream */ + +#ifndef EGL_KHR_stream_consumer_gltexture +#define EGL_KHR_stream_consumer_gltexture 1 +#ifdef EGL_KHR_stream +#define EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR 0x321E +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERACQUIREKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERRELEASEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerAcquireKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerReleaseKHR (EGLDisplay dpy, EGLStreamKHR stream); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_consumer_gltexture */ + +#ifndef EGL_KHR_stream_cross_process_fd +#define EGL_KHR_stream_cross_process_fd 1 +typedef int EGLNativeFileDescriptorKHR; +#ifdef EGL_KHR_stream +#define EGL_NO_FILE_DESCRIPTOR_KHR ((EGLNativeFileDescriptorKHR)(-1)) +typedef EGLNativeFileDescriptorKHR (EGLAPIENTRYP PFNEGLGETSTREAMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMFROMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLNativeFileDescriptorKHR EGLAPIENTRY eglGetStreamFileDescriptorKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamFromFileDescriptorKHR (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_cross_process_fd */ + +#ifndef EGL_KHR_stream_fifo +#define EGL_KHR_stream_fifo 1 +#ifdef EGL_KHR_stream +#define EGL_STREAM_FIFO_LENGTH_KHR 0x31FC +#define EGL_STREAM_TIME_NOW_KHR 0x31FD +#define EGL_STREAM_TIME_CONSUMER_KHR 0x31FE +#define EGL_STREAM_TIME_PRODUCER_KHR 0x31FF +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMTIMEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamTimeKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_fifo */ + +#ifndef EGL_KHR_stream_producer_aldatalocator +#define EGL_KHR_stream_producer_aldatalocator 1 +#ifdef EGL_KHR_stream +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_producer_aldatalocator */ + +#ifndef EGL_KHR_stream_producer_eglsurface +#define EGL_KHR_stream_producer_eglsurface 1 +#ifdef EGL_KHR_stream +#define EGL_STREAM_BIT_KHR 0x0800 +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATESTREAMPRODUCERSURFACEKHRPROC) (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSurface EGLAPIENTRY eglCreateStreamProducerSurfaceKHR (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_producer_eglsurface */ + +#ifndef EGL_KHR_surfaceless_context +#define EGL_KHR_surfaceless_context 1 +#endif /* EGL_KHR_surfaceless_context */ + +#ifndef EGL_KHR_vg_parent_image +#define EGL_KHR_vg_parent_image 1 +#define EGL_VG_PARENT_IMAGE_KHR 0x30BA +#endif /* EGL_KHR_vg_parent_image */ + +#ifndef EGL_KHR_wait_sync +#define EGL_KHR_wait_sync 1 +typedef EGLint (EGLAPIENTRYP PFNEGLWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLint EGLAPIENTRY eglWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags); +#endif +#endif /* EGL_KHR_wait_sync */ + +#ifndef EGL_ANDROID_blob_cache +#define EGL_ANDROID_blob_cache 1 +typedef khronos_ssize_t EGLsizeiANDROID; +typedef void (*EGLSetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, const void *value, EGLsizeiANDROID valueSize); +typedef EGLsizeiANDROID (*EGLGetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, void *value, EGLsizeiANDROID valueSize); +typedef void (EGLAPIENTRYP PFNEGLSETBLOBCACHEFUNCSANDROIDPROC) (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI void EGLAPIENTRY eglSetBlobCacheFuncsANDROID (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get); +#endif +#endif /* EGL_ANDROID_blob_cache */ + +#ifndef EGL_ANDROID_framebuffer_target +#define EGL_ANDROID_framebuffer_target 1 +#define EGL_FRAMEBUFFER_TARGET_ANDROID 0x3147 +#endif /* EGL_ANDROID_framebuffer_target */ + +#ifndef EGL_ANDROID_image_native_buffer +#define EGL_ANDROID_image_native_buffer 1 +#define EGL_NATIVE_BUFFER_ANDROID 0x3140 +#endif /* EGL_ANDROID_image_native_buffer */ + +#ifndef EGL_ANDROID_native_fence_sync +#define EGL_ANDROID_native_fence_sync 1 +#define EGL_SYNC_NATIVE_FENCE_ANDROID 0x3144 +#define EGL_SYNC_NATIVE_FENCE_FD_ANDROID 0x3145 +#define EGL_SYNC_NATIVE_FENCE_SIGNALED_ANDROID 0x3146 +#define EGL_NO_NATIVE_FENCE_FD_ANDROID -1 +typedef EGLint (EGLAPIENTRYP PFNEGLDUPNATIVEFENCEFDANDROIDPROC) (EGLDisplay dpy, EGLSyncKHR sync); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLint EGLAPIENTRY eglDupNativeFenceFDANDROID (EGLDisplay dpy, EGLSyncKHR sync); +#endif +#endif /* EGL_ANDROID_native_fence_sync */ + +#ifndef EGL_ANDROID_recordable +#define EGL_ANDROID_recordable 1 +#define EGL_RECORDABLE_ANDROID 0x3142 +#endif /* EGL_ANDROID_recordable */ + +#ifndef EGL_ANGLE_d3d_share_handle_client_buffer +#define EGL_ANGLE_d3d_share_handle_client_buffer 1 +#define EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE 0x3200 +#endif /* EGL_ANGLE_d3d_share_handle_client_buffer */ + +#ifndef EGL_ANGLE_query_surface_pointer +#define EGL_ANGLE_query_surface_pointer 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPOINTERANGLEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value); +#endif +#endif /* EGL_ANGLE_query_surface_pointer */ + +#ifndef EGL_ANGLE_surface_d3d_texture_2d_share_handle +#define EGL_ANGLE_surface_d3d_texture_2d_share_handle 1 +#endif /* EGL_ANGLE_surface_d3d_texture_2d_share_handle */ + +#ifndef EGL_ARM_pixmap_multisample_discard +#define EGL_ARM_pixmap_multisample_discard 1 +#define EGL_DISCARD_SAMPLES_ARM 0x3286 +#endif /* EGL_ARM_pixmap_multisample_discard */ + +#ifndef EGL_EXT_buffer_age +#define EGL_EXT_buffer_age 1 +#define EGL_BUFFER_AGE_EXT 0x313D +#endif /* EGL_EXT_buffer_age */ + +#ifndef EGL_EXT_client_extensions +#define EGL_EXT_client_extensions 1 +#endif /* EGL_EXT_client_extensions */ + +#ifndef EGL_EXT_create_context_robustness +#define EGL_EXT_create_context_robustness 1 +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT 0x30BF +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_EXT 0x3138 +#define EGL_NO_RESET_NOTIFICATION_EXT 0x31BE +#define EGL_LOSE_CONTEXT_ON_RESET_EXT 0x31BF +#endif /* EGL_EXT_create_context_robustness */ + +#ifndef EGL_EXT_image_dma_buf_import +#define EGL_EXT_image_dma_buf_import 1 +#define EGL_LINUX_DMA_BUF_EXT 0x3270 +#define EGL_LINUX_DRM_FOURCC_EXT 0x3271 +#define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272 +#define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273 +#define EGL_DMA_BUF_PLANE0_PITCH_EXT 0x3274 +#define EGL_DMA_BUF_PLANE1_FD_EXT 0x3275 +#define EGL_DMA_BUF_PLANE1_OFFSET_EXT 0x3276 +#define EGL_DMA_BUF_PLANE1_PITCH_EXT 0x3277 +#define EGL_DMA_BUF_PLANE2_FD_EXT 0x3278 +#define EGL_DMA_BUF_PLANE2_OFFSET_EXT 0x3279 +#define EGL_DMA_BUF_PLANE2_PITCH_EXT 0x327A +#define EGL_YUV_COLOR_SPACE_HINT_EXT 0x327B +#define EGL_SAMPLE_RANGE_HINT_EXT 0x327C +#define EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT 0x327D +#define EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT 0x327E +#define EGL_ITU_REC601_EXT 0x327F +#define EGL_ITU_REC709_EXT 0x3280 +#define EGL_ITU_REC2020_EXT 0x3281 +#define EGL_YUV_FULL_RANGE_EXT 0x3282 +#define EGL_YUV_NARROW_RANGE_EXT 0x3283 +#define EGL_YUV_CHROMA_SITING_0_EXT 0x3284 +#define EGL_YUV_CHROMA_SITING_0_5_EXT 0x3285 +#endif /* EGL_EXT_image_dma_buf_import */ + +#ifndef EGL_EXT_multiview_window +#define EGL_EXT_multiview_window 1 +#define EGL_MULTIVIEW_VIEW_COUNT_EXT 0x3134 +#endif /* EGL_EXT_multiview_window */ + +#ifndef EGL_EXT_platform_base +#define EGL_EXT_platform_base 1 +typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYEXTPROC) (EGLenum platform, void *native_display, const EGLint *attrib_list); +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list); +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplayEXT (EGLenum platform, void *native_display, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list); +#endif +#endif /* EGL_EXT_platform_base */ + +#ifndef EGL_EXT_platform_wayland +#define EGL_EXT_platform_wayland 1 +#define EGL_PLATFORM_WAYLAND_EXT 0x31D8 +#endif /* EGL_EXT_platform_wayland */ + +#ifndef EGL_EXT_platform_x11 +#define EGL_EXT_platform_x11 1 +#define EGL_PLATFORM_X11_EXT 0x31D5 +#define EGL_PLATFORM_X11_SCREEN_EXT 0x31D6 +#endif /* EGL_EXT_platform_x11 */ + +#ifndef EGL_EXT_swap_buffers_with_damage +#define EGL_EXT_swap_buffers_with_damage 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEEXTPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageEXT (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#endif +#endif /* EGL_EXT_swap_buffers_with_damage */ + +#ifndef EGL_HI_clientpixmap +#define EGL_HI_clientpixmap 1 +struct EGLClientPixmapHI { + void *pData; + EGLint iWidth; + EGLint iHeight; + EGLint iStride; +}; +#define EGL_CLIENT_PIXMAP_POINTER_HI 0x8F74 +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPIXMAPSURFACEHIPROC) (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurfaceHI (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap); +#endif +#endif /* EGL_HI_clientpixmap */ + +#ifndef EGL_HI_colorformats +#define EGL_HI_colorformats 1 +#define EGL_COLOR_FORMAT_HI 0x8F70 +#define EGL_COLOR_RGB_HI 0x8F71 +#define EGL_COLOR_RGBA_HI 0x8F72 +#define EGL_COLOR_ARGB_HI 0x8F73 +#endif /* EGL_HI_colorformats */ + +#ifndef EGL_IMG_context_priority +#define EGL_IMG_context_priority 1 +#define EGL_CONTEXT_PRIORITY_LEVEL_IMG 0x3100 +#define EGL_CONTEXT_PRIORITY_HIGH_IMG 0x3101 +#define EGL_CONTEXT_PRIORITY_MEDIUM_IMG 0x3102 +#define EGL_CONTEXT_PRIORITY_LOW_IMG 0x3103 +#endif /* EGL_IMG_context_priority */ + +#ifndef EGL_MESA_drm_image +#define EGL_MESA_drm_image 1 +#define EGL_DRM_BUFFER_FORMAT_MESA 0x31D0 +#define EGL_DRM_BUFFER_USE_MESA 0x31D1 +#define EGL_DRM_BUFFER_FORMAT_ARGB32_MESA 0x31D2 +#define EGL_DRM_BUFFER_MESA 0x31D3 +#define EGL_DRM_BUFFER_STRIDE_MESA 0x31D4 +#define EGL_DRM_BUFFER_USE_SCANOUT_MESA 0x00000001 +#define EGL_DRM_BUFFER_USE_SHARE_MESA 0x00000002 +typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEDRMIMAGEMESAPROC) (EGLDisplay dpy, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDRMIMAGEMESAPROC) (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLImageKHR EGLAPIENTRY eglCreateDRMImageMESA (EGLDisplay dpy, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglExportDRMImageMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride); +#endif +#endif /* EGL_MESA_drm_image */ + +#ifndef EGL_MESA_platform_gbm +#define EGL_MESA_platform_gbm 1 +#define EGL_PLATFORM_GBM_MESA 0x31D7 +#endif /* EGL_MESA_platform_gbm */ + +#ifndef EGL_NV_3dvision_surface +#define EGL_NV_3dvision_surface 1 +#define EGL_AUTO_STEREO_NV 0x3136 +#endif /* EGL_NV_3dvision_surface */ + +#ifndef EGL_NV_coverage_sample +#define EGL_NV_coverage_sample 1 +#define EGL_COVERAGE_BUFFERS_NV 0x30E0 +#define EGL_COVERAGE_SAMPLES_NV 0x30E1 +#endif /* EGL_NV_coverage_sample */ + +#ifndef EGL_NV_coverage_sample_resolve +#define EGL_NV_coverage_sample_resolve 1 +#define EGL_COVERAGE_SAMPLE_RESOLVE_NV 0x3131 +#define EGL_COVERAGE_SAMPLE_RESOLVE_DEFAULT_NV 0x3132 +#define EGL_COVERAGE_SAMPLE_RESOLVE_NONE_NV 0x3133 +#endif /* EGL_NV_coverage_sample_resolve */ + +#ifndef EGL_NV_depth_nonlinear +#define EGL_NV_depth_nonlinear 1 +#define EGL_DEPTH_ENCODING_NV 0x30E2 +#define EGL_DEPTH_ENCODING_NONE_NV 0 +#define EGL_DEPTH_ENCODING_NONLINEAR_NV 0x30E3 +#endif /* EGL_NV_depth_nonlinear */ + +#ifndef EGL_NV_native_query +#define EGL_NV_native_query 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEDISPLAYNVPROC) (EGLDisplay dpy, EGLNativeDisplayType *display_id); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEWINDOWNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEPIXMAPNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeDisplayNV (EGLDisplay dpy, EGLNativeDisplayType *display_id); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeWindowNV (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativePixmapNV (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap); +#endif +#endif /* EGL_NV_native_query */ + +#ifndef EGL_NV_post_convert_rounding +#define EGL_NV_post_convert_rounding 1 +#endif /* EGL_NV_post_convert_rounding */ + +#ifndef EGL_NV_post_sub_buffer +#define EGL_NV_post_sub_buffer 1 +#define EGL_POST_SUB_BUFFER_SUPPORTED_NV 0x30BE +typedef EGLBoolean (EGLAPIENTRYP PFNEGLPOSTSUBBUFFERNVPROC) (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglPostSubBufferNV (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height); +#endif +#endif /* EGL_NV_post_sub_buffer */ + +#ifndef EGL_NV_stream_sync +#define EGL_NV_stream_sync 1 +#define EGL_SYNC_NEW_FRAME_NV 0x321F +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESTREAMSYNCNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateStreamSyncNV (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list); +#endif +#endif /* EGL_NV_stream_sync */ + +#ifndef EGL_NV_sync +#define EGL_NV_sync 1 +typedef void *EGLSyncNV; +typedef khronos_utime_nanoseconds_t EGLTimeNV; +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_NV 0x30E6 +#define EGL_SYNC_STATUS_NV 0x30E7 +#define EGL_SIGNALED_NV 0x30E8 +#define EGL_UNSIGNALED_NV 0x30E9 +#define EGL_SYNC_FLUSH_COMMANDS_BIT_NV 0x0001 +#define EGL_FOREVER_NV 0xFFFFFFFFFFFFFFFFull +#define EGL_ALREADY_SIGNALED_NV 0x30EA +#define EGL_TIMEOUT_EXPIRED_NV 0x30EB +#define EGL_CONDITION_SATISFIED_NV 0x30EC +#define EGL_SYNC_TYPE_NV 0x30ED +#define EGL_SYNC_CONDITION_NV 0x30EE +#define EGL_SYNC_FENCE_NV 0x30EF +#define EGL_NO_SYNC_NV ((EGLSyncNV)0) +typedef EGLSyncNV (EGLAPIENTRYP PFNEGLCREATEFENCESYNCNVPROC) (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCNVPROC) (EGLSyncNV sync); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLFENCENVPROC) (EGLSyncNV sync); +typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCNVPROC) (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCNVPROC) (EGLSyncNV sync, EGLenum mode); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBNVPROC) (EGLSyncNV sync, EGLint attribute, EGLint *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncNV EGLAPIENTRY eglCreateFenceSyncNV (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncNV (EGLSyncNV sync); +EGLAPI EGLBoolean EGLAPIENTRY eglFenceNV (EGLSyncNV sync); +EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncNV (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout); +EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncNV (EGLSyncNV sync, EGLenum mode); +EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribNV (EGLSyncNV sync, EGLint attribute, EGLint *value); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_NV_sync */ + +#ifndef EGL_NV_system_time +#define EGL_NV_system_time 1 +typedef khronos_utime_nanoseconds_t EGLuint64NV; +#ifdef KHRONOS_SUPPORT_INT64 +typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMEFREQUENCYNVPROC) (void); +typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMENVPROC) (void); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeFrequencyNV (void); +EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeNV (void); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_NV_system_time */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/amdocl/EGL/eglplatform.h b/amdocl/EGL/eglplatform.h new file mode 100644 index 0000000000..3ab8844f09 --- /dev/null +++ b/amdocl/EGL/eglplatform.h @@ -0,0 +1,125 @@ +#ifndef __eglplatform_h_ +#define __eglplatform_h_ + +/* +** Copyright (c) 2007-2013 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Platform-specific types and definitions for egl.h + * $Revision: 23432 $ on $Date: 2013-10-09 00:57:24 -0700 (Wed, 09 Oct 2013) $ + * + * Adopters may modify khrplatform.h and this file to suit their platform. + * You are encouraged to submit all modifications to the Khronos group so that + * they can be included in future versions of this file. Please submit changes + * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla) + * by filing a bug against product "EGL" component "Registry". + */ + +#include + +/* Macros used in EGL function prototype declarations. + * + * EGL functions should be prototyped as: + * + * EGLAPI return-type EGLAPIENTRY eglFunction(arguments); + * typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments); + * + * KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h + */ + +#ifndef EGLAPI +#define EGLAPI KHRONOS_APICALL +#endif + +#ifndef EGLAPIENTRY +#define EGLAPIENTRY KHRONOS_APIENTRY +#endif +#define EGLAPIENTRYP EGLAPIENTRY* + +/* The types NativeDisplayType, NativeWindowType, and NativePixmapType + * are aliases of window-system-dependent types, such as X Display * or + * Windows Device Context. They must be defined in platform-specific + * code below. The EGL-prefixed versions of Native*Type are the same + * types, renamed in EGL 1.3 so all types in the API start with "EGL". + * + * Khronos STRONGLY RECOMMENDS that you use the default definitions + * provided below, since these changes affect both binary and source + * portability of applications using EGL running on different EGL + * implementations. + */ + +#if defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include + +typedef HDC EGLNativeDisplayType; +typedef HBITMAP EGLNativePixmapType; +typedef HWND EGLNativeWindowType; + +#elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */ + +typedef int EGLNativeDisplayType; +typedef void *EGLNativeWindowType; +typedef void *EGLNativePixmapType; + +#elif defined(__ANDROID__) || defined(ANDROID) + +#include + +struct egl_native_pixmap_t; + +typedef struct ANativeWindow* EGLNativeWindowType; +typedef struct egl_native_pixmap_t* EGLNativePixmapType; +typedef void* EGLNativeDisplayType; + +#elif defined(__unix__) + +/* X11 (tentative) */ +#include +#include + +typedef Display *EGLNativeDisplayType; +typedef Pixmap EGLNativePixmapType; +typedef Window EGLNativeWindowType; + +#else +#error "Platform not recognized" +#endif + +/* EGL 1.2 types, renamed for consistency in EGL 1.3 */ +typedef EGLNativeDisplayType NativeDisplayType; +typedef EGLNativePixmapType NativePixmapType; +typedef EGLNativeWindowType NativeWindowType; + + +/* Define EGLint. This must be a signed integral type large enough to contain + * all legal attribute names and values passed into and out of EGL, whether + * their type is boolean, bitmask, enumerant (symbolic constant), integer, + * handle, or other. While in general a 32-bit integer will suffice, if + * handles are 64 bit types, then EGLint should be defined as a signed 64-bit + * integer type. + */ +typedef khronos_int32_t EGLint; + +#endif /* __eglplatform_h */ diff --git a/amdocl/KHR/khrplatform.h b/amdocl/KHR/khrplatform.h new file mode 100644 index 0000000000..c9e6f17d34 --- /dev/null +++ b/amdocl/KHR/khrplatform.h @@ -0,0 +1,282 @@ +#ifndef __khrplatform_h_ +#define __khrplatform_h_ + +/* +** Copyright (c) 2008-2009 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Khronos platform-specific types and definitions. + * + * $Revision: 23298 $ on $Date: 2013-09-30 17:07:13 -0700 (Mon, 30 Sep 2013) $ + * + * Adopters may modify this file to suit their platform. Adopters are + * encouraged to submit platform specific modifications to the Khronos + * group so that they can be included in future versions of this file. + * Please submit changes by sending them to the public Khronos Bugzilla + * (http://khronos.org/bugzilla) by filing a bug against product + * "Khronos (general)" component "Registry". + * + * A predefined template which fills in some of the bug fields can be + * reached using http://tinyurl.com/khrplatform-h-bugreport, but you + * must create a Bugzilla login first. + * + * + * See the Implementer's Guidelines for information about where this file + * should be located on your system and for more details of its use: + * http://www.khronos.org/registry/implementers_guide.pdf + * + * This file should be included as + * #include + * by Khronos client API header files that use its types and defines. + * + * The types in khrplatform.h should only be used to define API-specific types. + * + * Types defined in khrplatform.h: + * khronos_int8_t signed 8 bit + * khronos_uint8_t unsigned 8 bit + * khronos_int16_t signed 16 bit + * khronos_uint16_t unsigned 16 bit + * khronos_int32_t signed 32 bit + * khronos_uint32_t unsigned 32 bit + * khronos_int64_t signed 64 bit + * khronos_uint64_t unsigned 64 bit + * khronos_intptr_t signed same number of bits as a pointer + * khronos_uintptr_t unsigned same number of bits as a pointer + * khronos_ssize_t signed size + * khronos_usize_t unsigned size + * khronos_float_t signed 32 bit floating point + * khronos_time_ns_t unsigned 64 bit time in nanoseconds + * khronos_utime_nanoseconds_t unsigned time interval or absolute time in + * nanoseconds + * khronos_stime_nanoseconds_t signed time interval in nanoseconds + * khronos_boolean_enum_t enumerated boolean type. This should + * only be used as a base type when a client API's boolean type is + * an enum. Client APIs which use an integer or other type for + * booleans cannot use this as the base type for their boolean. + * + * Tokens defined in khrplatform.h: + * + * KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values. + * + * KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0. + * KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0. + * + * Calling convention macros defined in this file: + * KHRONOS_APICALL + * KHRONOS_APIENTRY + * KHRONOS_APIATTRIBUTES + * + * These may be used in function prototypes as: + * + * KHRONOS_APICALL void KHRONOS_APIENTRY funcname( + * int arg1, + * int arg2) KHRONOS_APIATTRIBUTES; + */ + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APICALL + *------------------------------------------------------------------------- + * This precedes the return type of the function in the function prototype. + */ +#if defined(_WIN32) && !defined(__SCITECH_SNAP__) +# define KHRONOS_APICALL __declspec(dllimport) +#elif defined (__SYMBIAN32__) +# define KHRONOS_APICALL IMPORT_C +#else +# define KHRONOS_APICALL +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIENTRY + *------------------------------------------------------------------------- + * This follows the return type of the function and precedes the function + * name in the function prototype. + */ +#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__) + /* Win32 but not WinCE */ +# define KHRONOS_APIENTRY __stdcall +#else +# define KHRONOS_APIENTRY +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIATTRIBUTES + *------------------------------------------------------------------------- + * This follows the closing parenthesis of the function prototype arguments. + */ +#if defined (__ARMCC_2__) +#define KHRONOS_APIATTRIBUTES __softfp +#else +#define KHRONOS_APIATTRIBUTES +#endif + +/*------------------------------------------------------------------------- + * basic type definitions + *-----------------------------------------------------------------------*/ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__) + + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__VMS ) || defined(__sgi) + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(_WIN32) && !defined(__SCITECH_SNAP__) + +/* + * Win32 + */ +typedef __int32 khronos_int32_t; +typedef unsigned __int32 khronos_uint32_t; +typedef __int64 khronos_int64_t; +typedef unsigned __int64 khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__sun__) || defined(__digital__) + +/* + * Sun or Digital + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#if defined(__arch64__) || defined(_LP64) +typedef long int khronos_int64_t; +typedef unsigned long int khronos_uint64_t; +#else +typedef long long int khronos_int64_t; +typedef unsigned long long int khronos_uint64_t; +#endif /* __arch64__ */ +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif 0 + +/* + * Hypothetical platform with no float or int64 support + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#define KHRONOS_SUPPORT_INT64 0 +#define KHRONOS_SUPPORT_FLOAT 0 + +#else + +/* + * Generic fallback + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#endif + + +/* + * Types that are (so far) the same on all platforms + */ +typedef signed char khronos_int8_t; +typedef unsigned char khronos_uint8_t; +typedef signed short int khronos_int16_t; +typedef unsigned short int khronos_uint16_t; + +/* + * Types that differ between LLP64 and LP64 architectures - in LLP64, + * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears + * to be the only LLP64 architecture in current use. + */ +#ifdef _WIN64 +typedef signed long long int khronos_intptr_t; +typedef unsigned long long int khronos_uintptr_t; +typedef signed long long int khronos_ssize_t; +typedef unsigned long long int khronos_usize_t; +#else +typedef signed long int khronos_intptr_t; +typedef unsigned long int khronos_uintptr_t; +typedef signed long int khronos_ssize_t; +typedef unsigned long int khronos_usize_t; +#endif + +#if KHRONOS_SUPPORT_FLOAT +/* + * Float type + */ +typedef float khronos_float_t; +#endif + +#if KHRONOS_SUPPORT_INT64 +/* Time types + * + * These types can be used to represent a time interval in nanoseconds or + * an absolute Unadjusted System Time. Unadjusted System Time is the number + * of nanoseconds since some arbitrary system event (e.g. since the last + * time the system booted). The Unadjusted System Time is an unsigned + * 64 bit value that wraps back to 0 every 584 years. Time intervals + * may be either signed or unsigned. + */ +typedef khronos_uint64_t khronos_utime_nanoseconds_t; +typedef khronos_int64_t khronos_stime_nanoseconds_t; +#endif + +/* + * Dummy value used to pad enum types to 32 bits. + */ +#ifndef KHRONOS_MAX_ENUM +#define KHRONOS_MAX_ENUM 0x7FFFFFFF +#endif + +/* + * Enumerated boolean type + * + * Values other than zero should be considered to be true. Therefore + * comparisons should not be made against KHRONOS_TRUE. + */ +typedef enum { + KHRONOS_FALSE = 0, + KHRONOS_TRUE = 1, + KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM +} khronos_boolean_enum_t; + +#endif /* __khrplatform_h_ */ diff --git a/amdocl/cl_common.hpp b/amdocl/cl_common.hpp new file mode 100644 index 0000000000..a88a06f498 --- /dev/null +++ b/amdocl/cl_common.hpp @@ -0,0 +1,301 @@ +/* Copyright (c) 2008-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef CL_COMMON_HPP_ +#define CL_COMMON_HPP_ + +#include "top.hpp" +#include "platform/runtime.hpp" +#include "platform/command.hpp" +#include "platform/memory.hpp" +#include "thread/thread.hpp" +#include "platform/commandqueue.hpp" + +#include +#include + +//! \cond ignore +namespace amd { + +template +class NotNullWrapper +{ +private: + T* const ptrOrNull_; + +protected: + explicit NotNullWrapper(T* ptrOrNull) + : ptrOrNull_(ptrOrNull) + { } + +public: + void operator = (T value) const + { + if (ptrOrNull_ != NULL) { + *ptrOrNull_ = value; + } + } +}; + +template +class NotNullReference : protected NotNullWrapper +{ +public: + explicit NotNullReference(T* ptrOrNull) + : NotNullWrapper(ptrOrNull) + { } + + const NotNullWrapper& operator * () const { return *this; } +}; + +} // namespace amd + +template +inline amd::NotNullReference +not_null(T* ptrOrNull) +{ + return amd::NotNullReference(ptrOrNull); +} + +#define CL_CHECK_THREAD(thread) \ + (thread != NULL || ((thread = new amd::HostThread()) != NULL \ + && thread == amd::Thread::current())) + +#define RUNTIME_ENTRY_RET(ret, func, args) \ +CL_API_ENTRY ret CL_API_CALL \ +func args \ +{ \ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; \ + return (ret) 0; \ + } + +#define RUNTIME_ENTRY_RET_NOERRCODE(ret, func, args) \ +CL_API_ENTRY ret CL_API_CALL \ +func args \ +{ \ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + return (ret) 0; \ + } + +#define RUNTIME_ENTRY(ret, func, args) \ +CL_API_ENTRY ret CL_API_CALL \ +func args \ +{ \ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + return CL_OUT_OF_HOST_MEMORY; \ + } + +#define RUNTIME_ENTRY_VOID(ret, func, args) \ +CL_API_ENTRY ret CL_API_CALL \ +func args \ +{ \ + amd::Thread* thread = amd::Thread::current(); \ + if (!CL_CHECK_THREAD(thread)) { \ + return; \ + } + +#define RUNTIME_EXIT \ + /* FIXME_lmoriche: we should check to thread->lastError here! */ \ +} + +//! Helper function to check "properties" parameter in various functions +int checkContextProperties( + const cl_context_properties *properties, + bool* offlineDevices); + +namespace amd { + +namespace detail { + +template +struct ParamInfo +{ + static inline std::pair get(const T& param) { + return std::pair(¶m, sizeof(T)); + } +}; + +template <> +struct ParamInfo +{ + static inline std::pair get(const char* param) { + return std::pair(param, strlen(param) + 1); + } +}; + +template +struct ParamInfo +{ + static inline std::pair get(const char* param) { + return std::pair(param, strlen(param) + 1); + } +}; + +} // namespace detail + +template +static inline cl_int +clGetInfo( + T& field, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) +{ + const void *valuePtr; + size_t valueSize; + + std::tie(valuePtr, valueSize) + = detail::ParamInfo::type>::get(field); + + *not_null(param_value_size_ret) = valueSize; + + cl_int ret = CL_SUCCESS; + if (param_value != NULL && param_value_size < valueSize) { + if (!std::is_pointer() || !std::is_same::type>::type, char>()) { + return CL_INVALID_VALUE; + } + // For char* and char[] params, we will at least fill up to + // param_value_size, then return an error. + valueSize = param_value_size; + static_cast(param_value)[--valueSize] = '\0'; + ret = CL_INVALID_VALUE; + } + + if (param_value != NULL) { + ::memcpy(param_value, valuePtr, valueSize); + if (param_value_size > valueSize) { + ::memset(static_cast

(param_value) + valueSize, + '\0', param_value_size - valueSize); + } + } + + return ret; +} + +static inline cl_int +clSetEventWaitList( + Command::EventWaitList& eventWaitList, + const amd::HostQueue& hostQueue, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list) +{ + if ((num_events_in_wait_list == 0 && event_wait_list != NULL) + || (num_events_in_wait_list != 0 && event_wait_list == NULL)) { + return CL_INVALID_EVENT_WAIT_LIST; + } + + while (num_events_in_wait_list-- > 0) { + cl_event event = *event_wait_list++; + Event* amdEvent = as_amd(event); + if (!is_valid(event)) { + return CL_INVALID_EVENT_WAIT_LIST; + } + if (&hostQueue.context() != &amdEvent->context()) { + return CL_INVALID_CONTEXT; + } + if ((amdEvent->command().queue() != &hostQueue) && !amdEvent->notifyCmdQueue()) { + return CL_INVALID_EVENT_WAIT_LIST; + } + eventWaitList.push_back(amdEvent); + } + return CL_SUCCESS; +} + +//! Common function declarations for CL-external graphics API interop +cl_int clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue, + cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event, cl_command_type cmd_type); +cl_int clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue, + cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event, cl_command_type cmd_type); + +// This may need moving somewhere tidier... + +struct PlatformIDS { const struct KHRicdVendorDispatchRec* dispatch_; }; +class PlatformID { +public: + static PlatformIDS Platform; +}; +#define AMD_PLATFORM (reinterpret_cast(&amd::PlatformID::Platform)) + +} // namespace amd + +extern "C" { + +extern CL_API_ENTRY cl_key_amd CL_API_CALL +clCreateKeyAMD( + cl_platform_id platform, + void (CL_CALLBACK * destructor)( void * ), + cl_int * errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL +clObjectGetValueForKeyAMD( + void * object, + cl_key_amd key, + void ** ret_val); + +extern CL_API_ENTRY cl_int CL_API_CALL +clObjectSetValueForKeyAMD( + void * object, + cl_key_amd key, + void * value); + +#if defined(CL_VERSION_1_1) +extern CL_API_ENTRY cl_int CL_API_CALL +clSetCommandQueueProperty( + cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties *old_properties) CL_API_SUFFIX__VERSION_1_0; +#endif // CL_VERSION_1_1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clConvertImageAMD( + cl_context context, + cl_mem image, + const cl_image_format * image_format, + cl_int * errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferFromImageAMD( + cl_context context, + cl_mem image, + cl_int * errcode_ret); + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithAssemblyAMD( + cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret); + +} // extern "C" + +//! \endcond + +#endif /*CL_COMMON_HPP_*/ diff --git a/amdocl/cl_debugger_amd.h b/amdocl/cl_debugger_amd.h new file mode 100644 index 0000000000..1e9fe29e3a --- /dev/null +++ b/amdocl/cl_debugger_amd.h @@ -0,0 +1,694 @@ +/* Copyright (c) 2014-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef __CL_DEBUGGER_AMD_H +#define __CL_DEBUGGER_AMD_H + +#ifdef __APPLE__ +#include +#else +#include +#endif + +/****************************************** +* Private AMD extension cl_dbg * +******************************************/ +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +#define CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD -80 +#define CL_DEBUGGER_REGISTER_FAILURE_AMD -81 +#define CL_TRAP_HANDLER_NOT_DEFINED_AMD -82 +#define CL_EVENT_TIMEOUT_AMD -83 + + +typedef uintptr_t cl_dbg_event_amd; //! debug event + +/*! \brief Trap Handler Type + * + * The trap handler for each support type. + */ +enum cl_dbg_trap_type_amd { + CL_DBG_DEBUG_TRAP = 0, //! HW debug + CL_DBG_MAX_TRAP +}; + +/*! \brief Wave actions used to control the wave execution on the hardware + * + * The wave action enumerations are used to specify the desired + * behavior when calling the wave control function. Overall, there are + * five types of operations that can be specified. + */ +enum cl_dbg_waves_action_amd { + CL_DBG_WAVES_DONT_USE_ZERO = 0, //! NOT USED + CL_DBG_WAVES_HALT = 1, //! halt wave + CL_DBG_WAVES_RESUME = 2, //! resume wave + CL_DBG_WAVES_KILL = 3, //! kill wave + CL_DBG_WAVES_DEBUG = 4, //! debug wave + CL_DBG_WAVES_TRAP = 5, //! trap + CL_DBG_WAVES_MAX +}; + +/*! \brief Host actions when encountering an exception in the kernel. + * + * The host action enumeration is used to specify the desired host + * response in the event thatn a device kernel exception is encountered. + */ +enum cl_dbg_host_action_amd { + CL_DBG_HOST_IGNORE = 1, //! ignore the kernel exception + CL_DBG_HOST_EXIT = 2, //! exit the host application on a kernel exception + CL_DBG_HOST_NOTIFY = 4 //! report the kernel exception +}; + +/*! \brief Mode of the wave action when calling the wave control function + * + * The wave mode enumerations are used to specify the desired + * broadcast level when calling the wave control function. + */ +enum cl_dbg_wave_mode_amd { + CL_DBG_WAVEMODE_SINGLE = 0, //! send command to single wave + CL_DBG_WAVEMODE_BROADCAST = 2, //! send command to wave with match VMID + CL_DBG_WAVEMODE_BROADCAST_CU = 3, //! send command to wave with match VMID with specific CU + CL_DBG_WAVEMODE_MAX +}; + +/*! \brief Enumeration of address watch mode + * + * This enumeration indicates the different modes of address watch. + */ +enum cl_dbg_address_watch_mode_amd { + CL_DBG_ADDR_WATCH_MODE_READ = 0, //! Read operations only + CL_DBG_ADDR_WATCH_MODE_NONREAD = 1, //! Write or Atomic operations only + CL_DBG_ADDR_WATCH_MODE_ATOMIC = 2, //! Atomic Operations only + CL_DBG_ADDR_WATCH_MODE_ALL = 3, //! Read, Write or Atomic operations + CL_DBG_ADDR_WATCH_MODE_MAX //! Number of address watch modes +}; + +/*! \brief Dispatch exception policy descriptor + * + * The dispatch exception policy descriptor is used to define the + * expected exception policy in the event an exception is encountered + * on the associated dispatch. + */ +typedef struct _cl_dbg_exception_policy_amd { + cl_uint exceptionMask; //! exception mask + cl_dbg_waves_action_amd waveAction; //! wave action + cl_dbg_host_action_amd hostAction; //! host action + cl_dbg_wave_mode_amd waveMode; //! wave mode +} cl_dbg_exception_policy_amd; + +/*! \brief Kernel execution mode + * + * This structure is used to control the kernel execution mode. The + * following aspects are included in this structure: + * 1. Regular execution or debug mode (0: regular execution (default), + * 1: debug mode) + * 2. SQ debugger mode on/off + * 3. Disable L1 scalar cache (0: enable (default), 1: disable) + * 4. Disable L1 vector cache (0: enable (default), 1: disable) + * 5. Disable L2 cache (0: enable (default), 1: disable) + * 6. Num of CUs reserved for display (0 (default), 7: max) + */ +typedef struct _cl_dbg_kernel_exec_mode_amd { + union { + struct { + cl_uint monitorMode : 1; + cl_uint gpuSingleStepMode : 1; + cl_uint disableL1Scalar : 1; + cl_uint disableL1Vector : 1; + cl_uint disableL2Cache : 1; + cl_uint reservedCuNum : 3; + cl_uint reserved : 24; + }; + cl_uint ui32All; + }; +} cl_dbg_kernel_exec_mode_amd; + +/*! \brief GPU cache mask + * + * This structure is used to specify the GPU cache to be flushed/invalidated + */ +typedef struct _cl_dbg_gpu_cache_mask_amd { + union { + struct { + cl_uint sqICache : 1; //! instruction cache + cl_uint sqKCache : 1; //! data cache + cl_uint tcL1 : 1; //! tcL1 cache + cl_uint tcL2 : 1; //! tcL2 cache + cl_uint reserved : 28; + }; + cl_uint ui32All; + }; +} cl_dbg_gpu_cache_mask_amd; + +/*! \brief Dispatch Debug Info + * + * This structure is used to store the scratch and global memory descriptors + */ +typedef struct _cl_dispatch_debug_info_amd { + cl_uint scratchMemoryDescriptor[4]; //! Scratch memory descriptors + cl_uint globalMemoryDescriptor[4]; //! Global memory descriptors +} cl_dispatch_debug_info_amd; + +/*! \brief AQL Packet Info + * + * This structure is used to store AQL packet informatin for kernel dispatch + */ +typedef struct _cl_aql_packet_info_amd { + cl_uint trapReservedVgprIndex; //! VGPR index reserved for trap + //! value is -1 when kernel was not compiled + //! in debug mode. + cl_uint scratchBufferWaveOffset; //! scratch buffer wave offset + //! value is -1 when kernel was not compiled + //! in debug mode or scratch buffer is not enabled + void* pointerToIsaBuffer; //! Pointer to buffer containing ISA + size_t sizeOfIsaBuffer; //! Size of the ISA buffer + + cl_uint numberOfVgprs; //! Number of VGPRs used by the kernel + cl_uint numberOfSgprs; //! Number of SGPRs used by the kernel + size_t sizeOfStaticGroupMemory; //! Static local memory used by the kernel +} cl_aql_packet_info_amd; + +/*! \brief Wave address + * + * This structure specifies the wave for the SQ control command + */ +typedef struct _cl_dbg_wave_addr_amd { + cl_uint shaderEngine : 2; //! Shader engine + cl_uint shaderArray : 1; //! Shader array + cl_uint computeUnit : 4; //! Compute unit + cl_uint simd : 2; //! SIMD id + cl_uint wave : 4; //! Wave id + cl_uint vmid : 4; //! VMID + cl_uint reserved : 15; + +} cl_dbg_wave_addr_amd; + +/*! \brief Pre-dispatch call back function signature + * + * This is the signature of the call back fuction before the kernel + * dispatch. The call back function is to indicate the start of the + * the kernel launch. It is used by the debugger. + */ +typedef void* (*cl_PreDispatchCallBackFunctionAMD)(cl_device_id device, void* ocl_event_handle, + const void* aql_packet, void* acl_binary, + void* user_args); + +/*! \brief Post-dispatch call back function signature + * + * This is the signature of the call back fuction after the kernel + * dispatch. The call back function is to indicate the completion of + * the the kernel launch. It is used by the debugger. + */ +typedef void* (*cl_PostDispatchCallBackFunctionAMD)(cl_device_id device, cl_ulong event, + void* user_args); + +/*! \brief Set up the dispatch call back function pointers + * + * \param device specifies the device to be used + * + * \param preDispatchFunction is the function to be called before dispatching the kernel + * + * \param postDispatchFunction is the function to be called after kernel execution + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackFunctionsAMD( + cl_device_id /* device */, cl_PreDispatchCallBackFunctionAMD /* preDispatchFunction */, + cl_PostDispatchCallBackFunctionAMD /* postDispatchFunction */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Set up the arguments of the dispatch call back function + * + * \param device specifies the device to be used + * + * \param preDispatchArgs is the arguments for the pre-dispatch callback function + * + * \param postDispatchArgs is the arguments for the post-dispatch callback function + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackArgumentsAMD(cl_device_id /* device */, + void* /* preDispatchArgs */, + void* /* postDispatchArgs */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Invalidate all cache on the device. + * + * \param device specifies the device to be used + * + * \param mask is the mask to specify which cache to be flush/invalidate + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgFlushCacheAMD(cl_device_id /* device */, + cl_dbg_gpu_cache_mask_amd /* mask */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Set up an exception policy in the trap handler object + * + * \param device specifies the device to be used + * + * \param policy specifies the exception policy, which includes the exception mask, + * wave action, host action, wave mode. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the policy is not specified (NULL) + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetExceptionPolicyAMD( + cl_device_id /* device */, cl_dbg_exception_policy_amd* /* policy */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Get the exception policy in the trap handler object + * + * \param device specifies the device to be used + * + * \param policy is a pointer to the memory where the policy is returned + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the policy storage is not specified + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetExceptionPolicyAMD( + cl_device_id /* device */, cl_dbg_exception_policy_amd* /* policy */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Set up the kernel execution mode in the trap handler object + * + * \param device specifies the device to be used + * + * \param mode specifies the kernel execution mode, which indicate whether single + * step mode is used, how many CUs are reserved. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the mode is not specified, ie, has a NULL value + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetKernelExecutionModeAMD( + cl_device_id /* device */, cl_dbg_kernel_exec_mode_amd* /* mode */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Get the kernel execution mode in the trap handler object + * + * \param device specifies the device to be used + * + * \param mode is a pointer to the memory where the exectuion mode is returned + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the mode storage is not specified, ie, has a NULL value + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelExecutionModeAMD( + cl_device_id /* device */, cl_dbg_kernel_exec_mode_amd* /* mode */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Create a debug event + * + * \param device specifies the device to be used + * + * \param autoReset is the auto reset flag + * + * \param pDebugEvent returns the debug event to be used for exception notification + * + * \param pEventId is the event ID, which is not used at this moment + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the pDebugEvent value is NULL + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + * - CL_OUT_OF_RESOURCES if fails to create the event + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgCreateEventAMD(cl_device_id /* device */, + bool /* autoReset */, + cl_dbg_event_amd* /* pDebugEvent */, + cl_uint* /* pEventId */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Wait for a debug event to be signaled + * + * \param device specifies the device to be used + * + * \param pDebugEvent is the debug event to be waited for + * + * \param pEventId is the event ID, which is not used at this moment + * + * \param timeOut is the duration for waiting + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the pDebugEvent value is NULL + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + * - CL_EVENT_TIMEOUT_AMD if timeout occurs + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaitEventAMD(cl_device_id /* device */, + cl_dbg_event_amd /* pDebugEvent */, + cl_uint /* pEventId */, + cl_uint /* timeOut */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Destroy a debug event + * + * \param device specifies the device to be used + * + * \param pDebugEvent is the debug event to be waited for + * + * \param pEventId is the event ID, which is not used at this moment + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the pDebugEvent value is NULL + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgDestroyEventAMD(cl_device_id /* device */, + cl_dbg_event_amd* /* pDebugEvent */, + cl_uint* /* pEventId */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Register the debugger on a device + * + * \param context specifies the context for the debugger + * + * \param device specifies the device to be used + * + * \param pMessageStorge specifies the memory for trap message passing between KMD and OCL runtime + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_CONTEXT if the context is not valid + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the pMEssageStorge value is NULL + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + * - CL_OUT_OF_RESOURCES if a host queue cannot be created for the debugger + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgRegisterDebuggerAMD( + cl_context /* context */, cl_device_id /* device */, volatile void* /* pMessageStorage */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Unregister the debugger on a device + * + * \param device specifies the device to be used + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnregisterDebuggerAMD(cl_device_id /* device */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Setup the pointer of the acl_binary to be used by the debugger + * + * \param device specifies the device to be used + * + * \param aclBinary specifies the ACL binary to be used + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the aclBinary is not provided + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetAclBinaryAMD(cl_device_id /* device */, + void* /* aclBinary */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Control the execution of wavefront on the GPU + * + * \param device specifies the device to be used + * + * \param action specifies the wave action - halt, resume, kill, debug + * + * \param mode specifies the wave mode + * + * \param trapID specifies the trap ID, which should be 0x7 + * + * \param waveAddress specifies the wave address for the wave control + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the waveMsg is not provided, invalid action or mode value + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaveControlAMD(cl_device_id /* device */, + cl_dbg_waves_action_amd /* action */, + cl_dbg_wave_mode_amd /* mode */, + cl_uint /* trapId */, + cl_dbg_wave_addr_amd /* waveAddress */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Set watch points on memory address ranges to generate exception events + * + * \param device specifies the device to be used + * + * \param numWatchPoints specifies the number of watch points + * + * \param watchMode is the array of watch mode for the watch points + * + * \param watchAddress is the array of watch address for the watch points + * + * \param watchMask is the array of mask for the watch points + * + * \param watchEvent is the array of event for the watch points + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the number of points <= 0, or other parameters is not specified + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgAddressWatchAMD( + cl_device_id /* device */, cl_uint /* numWatchPoints */, + cl_dbg_address_watch_mode_amd* /* watchMode */, void** /* watchAddress */, + cl_ulong* /* watchMask */, cl_dbg_event_amd* /* watchEvent */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Get the packaet information for kernel execution + * + * \param device specifies the device to be used + * + * \param aqlCodeInfo specifies the kernel code and its size + * + * \param packetInfo points to the memory for the packet information to be returned + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetAqlPacketInfoAMD( + cl_device_id /* device */, const void* /* aqlCodeInfo */, + cl_aql_packet_info_amd* /* packetInfo */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Get the dispatch debug information + * + * \param device specifies the device to be used + * + * \param debugInfo points to the memory for the debug information to be returned + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetDispatchDebugInfoAMD( + cl_device_id /* device */, cl_dispatch_debug_info_amd* /* debugInfo */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Map the video memory for the kernel code to allow host access + * + * \param device specifies the device to be used + * + * \param aqlCodeAddress is the memory points to the returned host memory address for the kernel + * code + * + * \param aqlCodeSize returns the size of the kernel code + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapKernelCodeAMD(cl_device_id /* device */, + void* /* aqlCodeInfo */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Unmap the video memory for the kernel code + * + * \param device specifies the device to be used (no needed, just to be consistent) + * + * \param aqlCodeAddress is the memory points to the mapped memory address for the kernel code + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapKernelCodeAMD(cl_device_id /* device */, + cl_ulong* /* aqlCodeAddress */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Map the shader scratch ring's video memory to allow CPU access + * + * \param device specifies the device to be used + * + * \param scratchRingAddr is the memory points to the returned host memory address for scratch + * ring + * + * \param scratchRingSize returns the size of the scratch ring + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapScratchRingAMD(cl_device_id /* device */, + cl_ulong* /* scratchRingAddr */, + cl_uint* /* scratchRingSize */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Unmap the shader scratch ring's video memory + * + * \param device specifies the device to be used (no needed, just to be consistent) + * + * \param scratchRingAddr is the memory points to the mapped memory address for scratch ring + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapScratchRingAMD(cl_device_id /* device */, + cl_ulong* /* scratchRingAddr */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Get the memory object associated with the kernel parameter + * + * \param device specifies the device to be used + * + * \param paramIdx is the index of of the kernel argument + * + * \param paramMem is pointer of the memory associated with the kernel argument to be returned + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if the paramIdx is less than zero, or the paramMem has NULL value + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + * - CL_INVALID_KERNEL_ARGS if it fails to get the memory object for the kernel argument + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelParamMemAMD(cl_device_id /* devicepointer */, + cl_uint /* paramIdx */, + cl_mem* /* paramMem */ + ) CL_API_SUFFIX__VERSION_2_0; + +/*! \brief Set value of a global memory object + * + * \param device specifies the device to be used + * + * \param memObject is the memory object handle to be assigned the value specified in srcMem. + * + * \param offset is offset of the memory object + * + * \param srcMem points to the memory which contains the values to be assigned to the memory + * + * \param size size (in bytes) of the srcMem + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if memObj or srcPtr has NULL value, size <= 0 or offset < 0 + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetGlobalMemoryAMD(cl_device_id /* device */, + cl_mem /* memObject */, + cl_uint /* offset */, + void* /* srcMem */, + cl_uint /* size */ + ) CL_API_SUFFIX__VERSION_2_0; + + +/*! \brief Install the trap handler of a given type + * + * \param device specifies the device to be used + * + * \param trapType is the type of trap handler + * + * \param trapHandler is the pointer of trap handler (TBA) + * + * \param trapBuffer is the pointer of trap handler buffer (TMA) + * + * \param trapHandlerSize size (in bytes) of the trap handler + * + * \param trapBufferSize size (in bytes) of the trap handler buffer + * + * \return One of the following values: + * - CL_SUCCESS if the event occurs before the timeout + * - CL_INVALID_DEVICE if the device is not valid + * - CL_INVALID_VALUE if trapHandler is NULL or trapHandlerSize <= 0 + * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager + */ +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgInstallTrapAMD(cl_device_id /* device */, + cl_dbg_trap_type_amd /* trapType */, + cl_mem /* trapHandler */, + cl_mem /* trapBuffer */ + ) CL_API_SUFFIX__VERSION_2_0; + + +#ifdef __cplusplus +} /*extern "C"*/ +#endif /*__cplusplus*/ + +#endif /*__CL_DEBUGGER_AMD_H*/ diff --git a/amdocl/cl_icd.cpp b/amdocl/cl_icd.cpp new file mode 100644 index 0000000000..ec2cb48d7d --- /dev/null +++ b/amdocl/cl_icd.cpp @@ -0,0 +1,293 @@ +/* Copyright (c) 2008-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "cl_common.hpp" +#include "vdi_common.hpp" +#ifdef _WIN32 +#include +#include "cl_d3d9_amd.hpp" +#include "cl_d3d10_amd.hpp" +#include "cl_d3d11_amd.hpp" +#endif //_WIN32 + +#include + +#include + +amd::PlatformIDS amd::PlatformID::Platform = //{ NULL }; + {amd::ICDDispatchedObject::icdVendorDispatch_}; + +static cl_int CL_API_CALL icdGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, + size_t param_value_size, void* param_value, + size_t* param_value_size_ret) { + return clGetPlatformInfo(NULL, param_name, param_value_size, param_value, param_value_size_ret); +} + +static cl_int CL_API_CALL icdGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, + cl_uint num_entries, cl_device_id* devices, + cl_uint* num_devices) { + return clGetDeviceIDs(NULL, device_type, num_entries, devices, num_devices); +} + +static cl_int CL_API_CALL icdGetDeviceInfo(cl_device_id device, cl_device_info param_name, + size_t param_value_size, void* param_value, + size_t* param_value_size_ret) { + if (param_name == CL_DEVICE_PLATFORM) { + // Return the ICD platform instead of the default NULL platform. + cl_platform_id platform = reinterpret_cast(&amd::PlatformID::Platform); + return amd::clGetInfo(platform, param_value_size, param_value, param_value_size_ret); + } + + return clGetDeviceInfo(device, param_name, param_value_size, param_value, param_value_size_ret); +} + +cl_icd_dispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = { + {NULL /* should not get called */, icdGetPlatformInfo, icdGetDeviceIDs, icdGetDeviceInfo, + clCreateContext, clCreateContextFromType, clRetainContext, clReleaseContext, clGetContextInfo, + clCreateCommandQueue, clRetainCommandQueue, clReleaseCommandQueue, clGetCommandQueueInfo, + clSetCommandQueueProperty, clCreateBuffer, clCreateImage2D, clCreateImage3D, clRetainMemObject, + clReleaseMemObject, clGetSupportedImageFormats, clGetMemObjectInfo, clGetImageInfo, + clCreateSampler, clRetainSampler, clReleaseSampler, clGetSamplerInfo, + clCreateProgramWithSource, clCreateProgramWithBinary, clRetainProgram, clReleaseProgram, + clBuildProgram, clUnloadCompiler, clGetProgramInfo, clGetProgramBuildInfo, clCreateKernel, + clCreateKernelsInProgram, clRetainKernel, clReleaseKernel, clSetKernelArg, clGetKernelInfo, + clGetKernelWorkGroupInfo, clWaitForEvents, clGetEventInfo, clRetainEvent, clReleaseEvent, + clGetEventProfilingInfo, clFlush, clFinish, clEnqueueReadBuffer, clEnqueueWriteBuffer, + clEnqueueCopyBuffer, clEnqueueReadImage, clEnqueueWriteImage, clEnqueueCopyImage, + clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapBuffer, clEnqueueMapImage, + clEnqueueUnmapMemObject, clEnqueueNDRangeKernel, clEnqueueTask, clEnqueueNativeKernel, + clEnqueueMarker, clEnqueueWaitForEvents, clEnqueueBarrier, clGetExtensionFunctionAddress, + clCreateFromGLBuffer, clCreateFromGLTexture2D, clCreateFromGLTexture3D, + clCreateFromGLRenderbuffer, clGetGLObjectInfo, clGetGLTextureInfo, clEnqueueAcquireGLObjects, + clEnqueueReleaseGLObjects, clGetGLContextInfoKHR, + WINDOWS_SWITCH(clGetDeviceIDsFromD3D10KHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D10BufferKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D10Texture2DKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D10Texture3DKHR, NULL), + WINDOWS_SWITCH(clEnqueueAcquireD3D10ObjectsKHR, NULL), + WINDOWS_SWITCH(clEnqueueReleaseD3D10ObjectsKHR, NULL), clSetEventCallback, clCreateSubBuffer, + clSetMemObjectDestructorCallback, clCreateUserEvent, clSetUserEventStatus, + clEnqueueReadBufferRect, clEnqueueWriteBufferRect, clEnqueueCopyBufferRect, + NULL, NULL, NULL, clCreateEventFromGLsyncKHR, + + /* OpenCL 1.2*/ + clCreateSubDevices, clRetainDevice, clReleaseDevice, clCreateImage, + clCreateProgramWithBuiltInKernels, clCompileProgram, clLinkProgram, clUnloadPlatformCompiler, + clGetKernelArgInfo, clEnqueueFillBuffer, clEnqueueFillImage, clEnqueueMigrateMemObjects, + clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList, + clGetExtensionFunctionAddressForPlatform, clCreateFromGLTexture, + + WINDOWS_SWITCH(clGetDeviceIDsFromD3D11KHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D11BufferKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D11Texture2DKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D11Texture3DKHR, NULL), + WINDOWS_SWITCH(clCreateFromDX9MediaSurfaceKHR, NULL), + WINDOWS_SWITCH(clEnqueueAcquireD3D11ObjectsKHR, NULL), + WINDOWS_SWITCH(clEnqueueReleaseD3D11ObjectsKHR, NULL), + + WINDOWS_SWITCH(clGetDeviceIDsFromDX9MediaAdapterKHR, + NULL), // KHRpfn_clGetDeviceIDsFromDX9MediaAdapterKHR + // clGetDeviceIDsFromDX9MediaAdapterKHR; + WINDOWS_SWITCH( + clEnqueueAcquireDX9MediaSurfacesKHR, + NULL), // KHRpfn_clEnqueueAcquireDX9MediaSurfacesKHR clEnqueueAcquireDX9MediaSurfacesKHR; + WINDOWS_SWITCH( + clEnqueueReleaseDX9MediaSurfacesKHR, + NULL), // KHRpfn_clEnqueueReleaseDX9MediaSurfacesKHR clEnqueueReleaseDX9MediaSurfacesKHR; + + NULL, + NULL, NULL, NULL, + + clCreateCommandQueueWithProperties, clCreatePipe, clGetPipeInfo, clSVMAlloc, clSVMFree, + clEnqueueSVMFree, clEnqueueSVMMemcpy, clEnqueueSVMMemFill, clEnqueueSVMMap, clEnqueueSVMUnmap, + clCreateSamplerWithProperties, clSetKernelArgSVMPointer, clSetKernelExecInfo, + clGetKernelSubGroupInfo, + clCloneKernel, + clCreateProgramWithIL, + clEnqueueSVMMigrateMem, + clGetDeviceAndHostTimer, + clGetHostTimer, + clGetKernelSubGroupInfo, + clSetDefaultDeviceCommandQueue, + + clSetProgramReleaseCallback, + clSetProgramSpecializationConstant }}; + +#if defined(ATI_OS_WIN) +#include + +#pragma comment(lib, "shlwapi.lib") + +static bool ShouldLoadPlatform() { + // Get the OpenCL ICD registry values + HKEY platformsKey = NULL; + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\Khronos\\OpenCL\\Vendors", 0, KEY_READ, + &platformsKey) != ERROR_SUCCESS) + return true; + + std::vector registryValues; + DWORD dwIndex = 0; + while (true) { + char cszLibraryName[1024] = {0}; + DWORD dwLibraryNameSize = sizeof(cszLibraryName); + DWORD dwLibraryNameType = 0; + DWORD dwValue = 0; + DWORD dwValueSize = sizeof(dwValue); + + if (RegEnumValueA(platformsKey, dwIndex++, cszLibraryName, &dwLibraryNameSize, NULL, + &dwLibraryNameType, (LPBYTE)&dwValue, &dwValueSize) != ERROR_SUCCESS) + break; + // Require that the value be a DWORD and equal zero + if (dwLibraryNameType != REG_DWORD || dwValue != 0) { + continue; + } + registryValues.push_back(cszLibraryName); + } + RegCloseKey(platformsKey); + + HMODULE hm = NULL; + if (!GetModuleHandleExA( + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCSTR)&ShouldLoadPlatform, &hm)) + return true; + + char cszDllPath[1024] = {0}; + if (!GetModuleFileNameA(hm, cszDllPath, sizeof(cszDllPath))) return true; + + // If we are loaded from the DriverStore, then there should be a registry + // value matching our current module absolute path. + if (std::find(registryValues.begin(), registryValues.end(), cszDllPath) == registryValues.end()) + return true; + + LPSTR cszFileName; + char buffer[1024] = {0}; + if (!GetFullPathNameA(cszDllPath, sizeof(buffer), buffer, &cszFileName)) return true; + + // We found an absolute path in the registry that matched this DLL, now + // check if there is also an entry with the same filename. + if (std::find(registryValues.begin(), registryValues.end(), cszFileName) == registryValues.end()) + return true; + + // Lastly, check if there is a DLL with the same name in the System folder. + char cszSystemPath[1024] = {0}; +#if defined(ATI_BITS_32) + if (!GetSystemWow64DirectoryA(cszSystemPath, sizeof(cszSystemPath))) +#endif // defined(ATI_BITS_32) + if (!GetSystemDirectoryA(cszSystemPath, sizeof(cszSystemPath))) return true; + + std::string systemDllPath; + systemDllPath.append(cszSystemPath).append("\\").append(cszFileName); + if (!PathFileExistsA(systemDllPath.c_str())) { + return true; + } + + // If we get here, then all 3 conditions are true: + // - An entry in the registry with an absolute path matches the current DLL + // - An entry in the registry with a relative path matches the current DLL + // - A DLL with the same name was found in the system directory + // + // We should not load this platform! + + return false; +} + +#else + +#include + +// If there is only one platform, load it. +// If there is more than one platform, only load platforms that have visible devices +// If all platforms have no devices available, only load the PAL platform +static bool ShouldLoadPlatform() { + bool shouldLoad = true; + + if (!amd::Runtime::initialized()) { + amd::Runtime::init(); + } + const int numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, false); + + void *otherPlatform = nullptr; + if (amd::IS_LEGACY) { + otherPlatform = dlopen("libamdocl64.so", RTLD_LAZY); + if (otherPlatform != nullptr) { // Present platform exists + shouldLoad = numDevices > 0; + } + } else { + otherPlatform = dlopen("libamdocl-orca64.so", RTLD_LAZY); + if (otherPlatform != nullptr) { // Legacy platform exists + // gcc4.8 doesn't support casting void* to a function pointer + // Work around this by creating a typedef untill we upgrade the compiler + typedef void*(*clGetFunctionAddress_t)(const char *); + typedef cl_int(*clIcdGetPlatformIDs_t)(cl_uint, cl_platform_id *, cl_uint *); + + clGetFunctionAddress_t legacyGetFunctionAddress = + reinterpret_cast(dlsym(otherPlatform, "clGetExtensionFunctionAddress")); + clIcdGetPlatformIDs_t legacyGetPlatformIDs = + reinterpret_cast(legacyGetFunctionAddress("clIcdGetPlatformIDsKHR")); + + cl_uint numLegacyPlatforms = 0; + legacyGetPlatformIDs(0, nullptr, &numLegacyPlatforms); + + shouldLoad = (numDevices > 0) || (numLegacyPlatforms == 0); + } + } + + if (otherPlatform != nullptr) { + dlclose(otherPlatform); + } + + return shouldLoad; +} + +#endif // defined(ATI_OS_WIN) + +CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint num_entries, + cl_platform_id* platforms, + cl_uint* num_platforms) { + if (((num_entries > 0 || num_platforms == NULL) && platforms == NULL) || + (num_entries == 0 && platforms != NULL)) { + return CL_INVALID_VALUE; + } + + static bool shouldLoad = true; + + static std::once_flag initOnce; + std::call_once(initOnce, [](){ shouldLoad = ShouldLoadPlatform(); }); + + if (!shouldLoad) { + *not_null(num_platforms) = 0; + return CL_SUCCESS; + } + + if (!amd::Runtime::initialized()) { + amd::Runtime::init(); + } + + if (num_platforms != NULL && platforms == NULL) { + *num_platforms = 1; + return CL_SUCCESS; + } + + assert(platforms != NULL && "check the code above"); + *platforms = reinterpret_cast(&amd::PlatformID::Platform); + + *not_null(num_platforms) = 1; + return CL_SUCCESS; +} diff --git a/amdocl/cl_icd_amd.h b/amdocl/cl_icd_amd.h new file mode 100644 index 0000000000..69408e75ac --- /dev/null +++ b/amdocl/cl_icd_amd.h @@ -0,0 +1,739 @@ +/******************************************************************************* + * Copyright (c) 2008-2010 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_ICD_H +#define __OPENCL_CL_ICD_H + +#include +#include + +#define cl_khr_icd 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +typedef cl_int(CL_API_CALL* clGetPlatformIDs_fn)( + cl_uint /* num_entries */, cl_platform_id* /* platforms */, + cl_uint* /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetPlatformInfo_fn)( + cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetDeviceIDs_fn)( + cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, + cl_device_id* /* devices */, cl_uint* /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetDeviceInfo_fn)( + cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_context(CL_API_CALL* clCreateContext_fn)( + const cl_context_properties* /* properties */, cl_uint /* num_devices */, + const cl_device_id* /* devices */, + void(CL_CALLBACK* /* pfn_notify */)(const char*, const void*, size_t, void*), + void* /* user_data */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_context(CL_API_CALL* clCreateContextFromType_fn)( + const cl_context_properties* /* properties */, cl_device_type /* device_type */, + void(CL_CALLBACK* /* pfn_notify*/)(const char*, const void*, size_t, void*), + void* /* user_data */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainContext_fn)(cl_context /* context */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseContext_fn)(cl_context /* context */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetContextInfo_fn)( + cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_command_queue(CL_API_CALL* clCreateCommandQueue_fn)( + cl_context /* context */, cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainCommandQueue_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseCommandQueue_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetCommandQueueInfo_fn)( + cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clSetCommandQueueProperty_fn)( + cl_command_queue /* command_queue */, cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties* /* old_properties */) /*CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED*/; + +typedef cl_mem(CL_API_CALL* clCreateBuffer_fn)( + cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void* /* host_ptr */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateSubBuffer_fn)( + cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, + const void* /* buffer_create_info */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_mem(CL_API_CALL* clCreateImage2D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format* /* image_format */, + size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */, + void* /* host_ptr */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateImage3D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format* /* image_format */, + size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, + size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void* /* host_ptr */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainMemObject_fn)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseMemObject_fn)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetSupportedImageFormats_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, cl_image_format* /* image_formats */, + cl_uint* /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetMemObjectInfo_fn)( + cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetImageInfo_fn)( + cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clSetMemObjectDestructorCallback_fn)( + cl_mem /* memobj */, + void(CL_CALLBACK* /*pfn_notify*/)(cl_mem /* memobj */, void* /*user_data*/), + void* /*user_data */) CL_API_SUFFIX__VERSION_1_1; + +/* Sampler APIs */ +typedef cl_sampler(CL_API_CALL* clCreateSampler_fn)( + cl_context /* context */, cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainSampler_fn)(cl_sampler /* sampler */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseSampler_fn)(cl_sampler /* sampler */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetSamplerInfo_fn)( + cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +typedef cl_program(CL_API_CALL* clCreateProgramWithSource_fn)( + cl_context /* context */, cl_uint /* count */, const char** /* strings */, + const size_t* /* lengths */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context /* context */, + const void * /* strings */, size_t /* lengths */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_2_0; + +typedef cl_program(CL_API_CALL* clCreateProgramWithILKHR_fn)( + cl_context /* context */, const void* /* il */, size_t /* length */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program(CL_API_CALL* clCreateProgramWithBinary_fn)( + cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const size_t* /* lengths */, const unsigned char** /* binaries */, cl_int* /* binary_status */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainProgram_fn)(cl_program /* program */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseProgram_fn)(cl_program /* program */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clBuildProgram_fn)( + cl_program /* program */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* options */, + void(CL_CALLBACK* /* pfn_notify */)(cl_program /* program */, void* /* user_data */), + void* /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clUnloadCompiler_fn)(void) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetProgramInfo_fn)( + cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetProgramBuildInfo_fn)( + cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +typedef cl_kernel(CL_API_CALL* clCreateKernel_fn)( + cl_program /* program */, const char* /* kernel_name */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clCreateKernelsInProgram_fn)( + cl_program /* program */, cl_uint /* num_kernels */, cl_kernel* /* kernels */, + cl_uint* /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clRetainKernel_fn)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseKernel_fn)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clSetKernelArg_fn)(cl_kernel /* kernel */, cl_uint /* arg_index */, + size_t /* arg_size */, const void* /* arg_value */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetKernelInfo_fn)( + cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetKernelWorkGroupInfo_fn)( + cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Event Object APIs */ +typedef cl_int(CL_API_CALL* clWaitForEvents_fn)( + cl_uint /* num_events */, const cl_event* /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetEventInfo_fn)( + cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_event(CL_API_CALL* clCreateUserEvent_fn)( + cl_context /* context */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clRetainEvent_fn)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clReleaseEvent_fn)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clSetUserEventStatus_fn)( + cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clSetEventCallback_fn)( + cl_event /* event */, cl_int /* command_exec_callback_type */, + void(CL_CALLBACK* /* pfn_notify */)(cl_event, cl_int, void*), + void* /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +/* Profiling APIs */ +typedef cl_int(CL_API_CALL* clGetEventProfilingInfo_fn)( + cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +typedef cl_int(CL_API_CALL* clFlush_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clFinish_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +typedef cl_int(CL_API_CALL* clEnqueueReadBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, + size_t /* offset */, size_t /* cb */, void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueReadBufferRect_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, + const size_t* /* buffer_offset */, const size_t* /* host_offset */, const size_t* /* region */, + size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueWriteBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, + size_t /* offset */, size_t /* cb */, const void* /* ptr */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueWriteBufferRect_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, + const size_t* /* buffer_offset */, const size_t* /* host_offset */, const size_t* /* region */, + size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, const void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueCopyBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, + size_t /* src_offset */, size_t /* dst_offset */, size_t /* cb */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyBufferRect_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, + const size_t* /* src_origin */, const size_t* /* dst_origin */, const size_t* /* region */, + size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */, + size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueReadImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, + const size_t* /* origin[3] */, const size_t* /* region[3] */, size_t /* row_pitch */, + size_t /* slice_pitch */, void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueWriteImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, + const size_t* /* origin[3] */, const size_t* /* region[3] */, size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, const void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, + const size_t* /* src_origin[3] */, const size_t* /* dst_origin[3] */, + const size_t* /* region[3] */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyImageToBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, + const size_t* /* src_origin[3] */, const size_t* /* region[3] */, size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyBufferToImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, + size_t /* src_offset */, const size_t* /* dst_origin[3] */, const size_t* /* region[3] */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef void*(CL_API_CALL* clEnqueueMapBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, size_t /* offset */, size_t /* cb */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */, cl_int* /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; + +typedef void*(CL_API_CALL* clEnqueueMapImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, const size_t* /* origin[3] */, const size_t* /* region[3] */, + size_t* /* image_row_pitch */, size_t* /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */, cl_int* /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueUnmapMemObject_fn)( + cl_command_queue /* command_queue */, cl_mem /* memobj */, void* /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueNDRangeKernel_fn)( + cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, + const size_t* /* global_work_offset */, const size_t* /* global_work_size */, + const size_t* /* local_work_size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueTask_fn)(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueNativeKernel_fn)( + cl_command_queue /* command_queue */, void(CL_CALLBACK* user_func)(void*), void* /* args */, + size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem* /* mem_list */, + const void** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueMarker_fn)(cl_command_queue /* command_queue */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueWaitForEvents_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_events */, + const cl_event* /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueBarrier_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +typedef void*(CL_API_CALL* clGetExtensionFunctionAddress_fn)(const char* /* func_name */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLBuffer_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* bufobj */, + int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLTexture2D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, + cl_GLint /* miplevel */, cl_GLuint /* texture */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLTexture3D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, + cl_GLint /* miplevel */, cl_GLuint /* texture */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLRenderbuffer_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* renderbuffer */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetGLObjectInfo_fn)( + cl_mem /* memobj */, cl_gl_object_type* /* gl_object_type */, + cl_GLuint* /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetGLTextureInfo_fn)( + cl_mem /* memobj */, cl_gl_texture_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_event(CL_API_CALL* clCreateEventFromGLsyncKHR_fn)( + cl_context /* context */, cl_GLsync /* cl_GLsync */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueAcquireGLObjects_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_objects */, + const cl_mem* /* mem_objects */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueReleaseGLObjects_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_objects */, + const cl_mem* /* mem_objects */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clCreateSubDevices_fn)( + cl_device_id /* in_device */, const cl_device_partition_property* /* properties */, + cl_uint /* num_entries */, cl_device_id* /* out_devices */, + cl_uint* /* num_devices */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clRetainDevice_fn)(cl_device_id /* device */) + CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clReleaseDevice_fn)(cl_device_id /* device */) + CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL* clCreateImage_fn)(cl_context /* context */, cl_mem_flags /* flags */, + const cl_image_format* /* image_format*/, + const cl_image_desc* /* image_desc*/, + void* /* host_ptr */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program(CL_API_CALL* clCreateProgramWithBuiltInKernels_fn)( + cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* kernel_names */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clCompileProgram_fn)( + cl_program /* program */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* options */, cl_uint /* num_input_headers */, + const cl_program* /* input_headers */, const char** /* header_include_names */, + void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* /* user_data */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program(CL_API_CALL* clLinkProgram_fn)( + cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* options */, cl_uint /* num_input_programs */, + const cl_program* /* input_programs */, + void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), void* /* user_data */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clUnloadPlatformCompiler_fn)(cl_platform_id /* platform */) + CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clGetKernelArgInfo_fn)( + cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueFillBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, const void* /* pattern */, + size_t /* pattern_size */, size_t /* offset */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueFillImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, const void* /* fill_color */, + const size_t* /* origin */, const size_t* /* region */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueMigrateMemObjects_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */, + const cl_mem* /* mem_objects */, cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueMarkerWithWaitList_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueBarrierWithWaitList_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef void*(CL_API_CALL* clGetExtensionFunctionAddressForPlatform_fn)( + cl_platform_id /* platform */, const char* /* funcname */)CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL* clCreateFromGLTexture_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* texture_target */, + cl_GLint /* miplevel */, cl_GLuint /* texture */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_command_queue(CL_API_CALL* clCreateCommandQueueWithProperties_fn)( + cl_context /* context */, cl_device_id /* device */, + const cl_queue_properties* /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_sampler(CL_API_CALL* clCreateSamplerWithProperties_fn)( + cl_context /* context */, const cl_sampler_properties* /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef void*(CL_API_CALL* clSVMAlloc_fn)(cl_context /* context */, cl_svm_mem_flags /* flags */, + size_t /* size */, + cl_uint /* alignment */)CL_API_SUFFIX__VERSION_2_0; + +typedef void(CL_API_CALL* clSVMFree_fn)(cl_context /* context */, + void* /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clSetKernelArgSVMPointer_fn)( + cl_kernel /* kernel */, cl_uint /* arg_index */, + const void* /* arg_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clSetKernelExecInfo_fn)( + cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */, + const void* /* param_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMFree_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, + void* [] /* svm_pointers */, + void(CL_CALLBACK* /* pfn_free_func */)(cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void* [] /* svm_pointers */, void* /* user_data */), + void* /* user_data */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMMemcpy_fn)( + cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void* /* dst_ptr */, + const void* /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMMemFill_fn)( + cl_command_queue /* command_queue */, void* /* svm_ptr */, const void* /* pattern */, + size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMMap_fn)( + cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* flags */, + void* /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMUnmap_fn)(cl_command_queue /* command_queue */, + void* /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_mem(CL_API_CALL* clCreatePipe_fn)(cl_context /* context */, cl_mem_flags /* flags */, + cl_uint /* pipe_packet_size */, + cl_uint /* pipe_max_packets */, + const cl_pipe_properties* /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clGetPipeInfo_fn)( + cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clGetKernelSubGroupInfoKHR_fn)( + cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_sub_group_info /* param_name */, + size_t /* input_value_size */, const void* /* input_value */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + + +typedef cl_int(CL_API_CALL* clSetDefaultDeviceCommandQueue_fn)( + cl_context /* context */, cl_device_id /* device */, + cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_kernel(CL_API_CALL* clCloneKernel_fn)( + cl_kernel /* source_kernel */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int (CL_API_CALL* clEnqueueSVMMigrateMem_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, + const void ** /* svm_pointers */, const size_t * /* sizes */, + cl_mem_migration_flags /* flags */, cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int (CL_API_CALL* clGetDeviceAndHostTimer_fn)( + cl_device_id /* device */, cl_ulong * /* device_timestamp */, + cl_ulong * /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int (CL_API_CALL* clGetHostTimer_fn)( + cl_device_id /* device */, cl_ulong * /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1; + +typedef cl_int (CL_API_CALL* clSetProgramSpecializationConstant_fn)( + cl_program /* program */, cl_uint /* spec_id */, size_t /* spec_size */, + const void* /* spec_value */) CL_API_SUFFIX__VERSION_2_2; + +typedef cl_int (CL_API_CALL* clSetProgramReleaseCallback_fn)( + cl_program /* program */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program program, void * user_data), + void * /* user_data */) CL_API_SUFFIX__VERSION_2_2; + +typedef struct _cl_icd_dispatch_table { + /* OpenCL 1.0 */ + clGetPlatformIDs_fn GetPlatformIDs; + clGetPlatformInfo_fn GetPlatformInfo; + clGetDeviceIDs_fn GetDeviceIDs; + clGetDeviceInfo_fn GetDeviceInfo; + clCreateContext_fn CreateContext; + clCreateContextFromType_fn CreateContextFromType; + clRetainContext_fn RetainContext; + clReleaseContext_fn ReleaseContext; + clGetContextInfo_fn GetContextInfo; + clCreateCommandQueue_fn CreateCommandQueue; + clRetainCommandQueue_fn RetainCommandQueue; + clReleaseCommandQueue_fn ReleaseCommandQueue; + clGetCommandQueueInfo_fn GetCommandQueueInfo; + clSetCommandQueueProperty_fn SetCommandQueueProperty; + clCreateBuffer_fn CreateBuffer; + clCreateImage2D_fn CreateImage2D; + clCreateImage3D_fn CreateImage3D; + clRetainMemObject_fn RetainMemObject; + clReleaseMemObject_fn ReleaseMemObject; + clGetSupportedImageFormats_fn GetSupportedImageFormats; + clGetMemObjectInfo_fn GetMemObjectInfo; + clGetImageInfo_fn GetImageInfo; + clCreateSampler_fn CreateSampler; + clRetainSampler_fn RetainSampler; + clReleaseSampler_fn ReleaseSampler; + clGetSamplerInfo_fn GetSamplerInfo; + clCreateProgramWithSource_fn CreateProgramWithSource; + clCreateProgramWithBinary_fn CreateProgramWithBinary; + clRetainProgram_fn RetainProgram; + clReleaseProgram_fn ReleaseProgram; + clBuildProgram_fn BuildProgram; + clUnloadCompiler_fn UnloadCompiler; + clGetProgramInfo_fn GetProgramInfo; + clGetProgramBuildInfo_fn GetProgramBuildInfo; + clCreateKernel_fn CreateKernel; + clCreateKernelsInProgram_fn CreateKernelsInProgram; + clRetainKernel_fn RetainKernel; + clReleaseKernel_fn ReleaseKernel; + clSetKernelArg_fn SetKernelArg; + clGetKernelInfo_fn GetKernelInfo; + clGetKernelWorkGroupInfo_fn GetKernelWorkGroupInfo; + clWaitForEvents_fn WaitForEvents; + clGetEventInfo_fn GetEventInfo; + clRetainEvent_fn RetainEvent; + clReleaseEvent_fn ReleaseEvent; + clGetEventProfilingInfo_fn GetEventProfilingInfo; + clFlush_fn Flush; + clFinish_fn Finish; + clEnqueueReadBuffer_fn EnqueueReadBuffer; + clEnqueueWriteBuffer_fn EnqueueWriteBuffer; + clEnqueueCopyBuffer_fn EnqueueCopyBuffer; + clEnqueueReadImage_fn EnqueueReadImage; + clEnqueueWriteImage_fn EnqueueWriteImage; + clEnqueueCopyImage_fn EnqueueCopyImage; + clEnqueueCopyImageToBuffer_fn EnqueueCopyImageToBuffer; + clEnqueueCopyBufferToImage_fn EnqueueCopyBufferToImage; + clEnqueueMapBuffer_fn EnqueueMapBuffer; + clEnqueueMapImage_fn EnqueueMapImage; + clEnqueueUnmapMemObject_fn EnqueueUnmapMemObject; + clEnqueueNDRangeKernel_fn EnqueueNDRangeKernel; + clEnqueueTask_fn EnqueueTask; + clEnqueueNativeKernel_fn EnqueueNativeKernel; + clEnqueueMarker_fn EnqueueMarker; + clEnqueueWaitForEvents_fn EnqueueWaitForEvents; + clEnqueueBarrier_fn EnqueueBarrier; + clGetExtensionFunctionAddress_fn GetExtensionFunctionAddress; + clCreateFromGLBuffer_fn CreateFromGLBuffer; + clCreateFromGLTexture2D_fn CreateFromGLTexture2D; + clCreateFromGLTexture3D_fn CreateFromGLTexture3D; + clCreateFromGLRenderbuffer_fn CreateFromGLRenderbuffer; + clGetGLObjectInfo_fn GetGLObjectInfo; + clGetGLTextureInfo_fn GetGLTextureInfo; + clEnqueueAcquireGLObjects_fn EnqueueAcquireGLObjects; + clEnqueueReleaseGLObjects_fn EnqueueReleaseGLObjects; + clGetGLContextInfoKHR_fn GetGLContextInfoKHR; + void* _reservedForD3D10KHR[6]; + + /* OpenCL 1.1 */ + clSetEventCallback_fn SetEventCallback; + clCreateSubBuffer_fn CreateSubBuffer; + clSetMemObjectDestructorCallback_fn SetMemObjectDestructorCallback; + clCreateUserEvent_fn CreateUserEvent; + clSetUserEventStatus_fn SetUserEventStatus; + clEnqueueReadBufferRect_fn EnqueueReadBufferRect; + clEnqueueWriteBufferRect_fn EnqueueWriteBufferRect; + clEnqueueCopyBufferRect_fn EnqueueCopyBufferRect; + + void* _reservedForDeviceFissionEXT[3]; + clCreateEventFromGLsyncKHR_fn CreateEventFromGLsyncKHR; + + /* OpenCL 1.2 */ + clCreateSubDevices_fn CreateSubDevices; + clRetainDevice_fn RetainDevice; + clReleaseDevice_fn ReleaseDevice; + clCreateImage_fn CreateImage; + clCreateProgramWithBuiltInKernels_fn CreateProgramWithBuiltInKernels; + clCompileProgram_fn CompileProgram; + clLinkProgram_fn LinkProgram; + clUnloadPlatformCompiler_fn UnloadPlatformCompiler; + clGetKernelArgInfo_fn GetKernelArgInfo; + clEnqueueFillBuffer_fn EnqueueFillBuffer; + clEnqueueFillImage_fn EnqueueFillImage; + clEnqueueMigrateMemObjects_fn EnqueueMigrateMemObjects; + clEnqueueMarkerWithWaitList_fn EnqueueMarkerWithWaitList; + clEnqueueBarrierWithWaitList_fn EnqueueBarrierWithWaitList; + clGetExtensionFunctionAddressForPlatform_fn GetExtensionFunctionAddressForPlatform; + clCreateFromGLTexture_fn CreateFromGLTexture; + + /* cl_khr_d3d11_sharing, cl_khr_dx9_media_sharing */ + void* _reservedForD3DExtensions[10]; + + /* cl_khr_egl_image, cl_khr_egl_event */ + void* _reservedForEGLExtensions[4]; + + /* OpenCL 2.0 */ + clCreateCommandQueueWithProperties_fn CreateCommandQueueWithProperties; + clCreatePipe_fn CreatePipe; + clGetPipeInfo_fn GetPipeInfo; + clSVMAlloc_fn SVMAlloc; + clSVMFree_fn SVMFree; + clEnqueueSVMFree_fn EnqueueSVMFree; + clEnqueueSVMMemcpy_fn EnqueueSVMMemcpy; + clEnqueueSVMMemFill_fn EnqueueSVMMemFill; + clEnqueueSVMMap_fn EnqueueSVMMap; + clEnqueueSVMUnmap_fn EnqueueSVMUnmap; + clCreateSamplerWithProperties_fn CreateSamplerWithProperties; + clSetKernelArgSVMPointer_fn SetKernelArgSVMPointer; + clSetKernelExecInfo_fn SetKernelExecInfo; + /* cl_khr_sub_groups */ + clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfoKHR; + + /* OpenCL 2.1 */ + clCloneKernel_fn CloneKernel; + clCreateProgramWithILKHR_fn CreateProgramWithILKHR; + clEnqueueSVMMigrateMem_fn EnqueueSVMMigrateMem; + clGetDeviceAndHostTimer_fn GetDeviceAndHostTimer; + clGetHostTimer_fn GetHostTimer; + clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfo; + clSetDefaultDeviceCommandQueue_fn SetDefaultDeviceCommandQueue; + + /* OpenCL 2.2 */ + clSetProgramReleaseCallback_fn SetProgramReleaseCallback; + clSetProgramSpecializationConstant_fn SetProgramSpecializationConstant; + +} cl_icd_dispatch_table; + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __OPENCL_CL_ICD_H */ diff --git a/amdocl/cl_kernel.h b/amdocl/cl_kernel.h new file mode 100644 index 0000000000..e0c960d3ea --- /dev/null +++ b/amdocl/cl_kernel.h @@ -0,0 +1,165 @@ +/* Copyright (c) 2012-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef CL_KERNEL_H_ +#define CL_KERNEL_H_ + +struct clk_builtins_t; + +// This must be a multiple of sizeof(cl_ulong16) +#define __CPU_SCRATCH_SIZE 128 + +#define CLK_PRIVATE_MEMORY_SIZE (16 * 1024) + +struct clk_thread_info_block_t { + // Warning! The size of this struct needs to be a multiple + // of 16 when compiling 64 bit + + struct clk_builtins_t const* builtins; + void* local_mem_base; + void* local_scratch; + const void* table_base; + size_t pad; + + uint work_dim; + size_t global_offset[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t global_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + + size_t enqueued_local_size[4]; + size_t local_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t local_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t group_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ +}; + +typedef enum clk_value_type_t { + T_VOID, + T_CHAR, + T_SHORT, + T_INT, + T_LONG, + T_FLOAT, + T_DOUBLE, + T_POINTER, + T_CHAR2, + T_CHAR3, + T_CHAR4, + T_CHAR8, + T_CHAR16, + T_SHORT2, + T_SHORT3, + T_SHORT4, + T_SHORT8, + T_SHORT16, + T_INT2, + T_INT3, + T_INT4, + T_INT8, + T_INT16, + T_LONG2, + T_LONG3, + T_LONG4, + T_LONG8, + T_LONG16, + T_FLOAT2, + T_FLOAT3, + T_FLOAT4, + T_FLOAT8, + T_FLOAT16, + T_DOUBLE2, + T_DOUBLE3, + T_DOUBLE4, + T_DOUBLE8, + T_DOUBLE16, + T_SAMPLER, + T_SEMA, + T_STRUCT, + T_QUEUE, + T_PAD +} clk_value_type_t; + +typedef enum clk_address_space_t { + A_PRIVATE, + A_LOCAL, + A_CONSTANT, + A_GLOBAL, + A_REGION +} clk_address_space_t; + +// kernel arg access qualifier and type qualifier +typedef enum clk_arg_qualifier_t { + Q_NONE = 0, + + // for image type only, access qualifier + Q_READ = 1, + Q_WRITE = 2, + + // for pointer type only + Q_CONST = 4, // pointee + Q_RESTRICT = 8, + Q_VOLATILE = 16, // pointee + Q_PIPE = 32 // pipe + +} clk_arg_qualifier_t; + +#pragma pack(push, 4) +struct clk_parameter_descriptor_t { + clk_value_type_t type; + clk_address_space_t space; + uint qualifier; + const char* name; +}; +#pragma pack(pop) + +//#define CLK_LOCAL_MEM_FENCE (1 << 0) +//#define CLK_GLOBAL_MEM_FENCE (1 << 1) + +struct clk_builtins_t { + /* Synchronization functions */ + void (*barrier_ptr)(cl_mem_fence_flags flags); + + /* AMD Only builtins: FIXME_lmoriche (extension) */ + void* reserved; + int (*printf_ptr)(const char* format, ...); +}; + +enum clk_natures_t { KN_HAS_BARRIER = 1 << 0, KN_WG_LEVEL = 1 << 1 }; + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4200) +#endif + +#if !defined(__OPENCL_VERSION__) || __OPENCL_VERSION__ >= 200 + +typedef struct clk_pipe_t { + size_t read_idx; + size_t write_idx; + size_t end_idx; + char padding[128 - 3 * sizeof(size_t)]; + char packets[]; +} clk_pipe_t; + +#endif + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#endif /*CL_KERNEL_H_*/ diff --git a/amdocl/cl_profile_amd.h b/amdocl/cl_profile_amd.h new file mode 100644 index 0000000000..7adca946e0 --- /dev/null +++ b/amdocl/cl_profile_amd.h @@ -0,0 +1,189 @@ +/* Copyright (c) 2009-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef __CL_PROFILE_AMD_H +#define __CL_PROFILE_AMD_H + +#include "CL/cl_platform.h" + +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +typedef struct _cl_perfcounter_amd* cl_perfcounter_amd; +typedef cl_ulong cl_perfcounter_property; +typedef cl_uint cl_perfcounter_info; + +/* cl_perfcounter_info */ +enum PerfcounterInfo { + CL_PERFCOUNTER_NONE = 0x0, + CL_PERFCOUNTER_REFERENCE_COUNT = 0x1, + CL_PERFCOUNTER_DATA = 0x2, + CL_PERFCOUNTER_GPU_BLOCK_INDEX = 0x3, + CL_PERFCOUNTER_GPU_COUNTER_INDEX = 0x4, + CL_PERFCOUNTER_GPU_EVENT_INDEX = 0x5, + CL_PERFCOUNTER_LAST +}; + +/********************************* +* Set device clock mode data +*********************************/ +enum cl_DeviceClockMode_AMD { + CL_DEVICE_CLOCK_MODE_DEFAULT_AMD = 0x0, /*Device clocks and other power settings are restored to default*/ + CL_DEVICE_CLOCK_MODE_QUERY_AMD = 0x1, /*Queries the current device clock ratios. Leaves the clock mode of the device unchanged*/ + CL_DEVICE_CLOCK_MODE_PROFILING_AMD = 0x2, /*Scale down from peak ratio*/ + CL_DEVICE_CLOCK_MODE_MINIMUMMEMORY_AMD = 0x3, /* Memory clock is set to the lowest available level*/ + CL_DEVICE_CLOCK_MODE_MINIMUMENGINE_AMD = 0x4, /*Engine clock is set to the lowest available level*/ + CL_DEVICE_CLOCK_MODE_PEAK_AMD = 0x5, /*Clocks set to maximum when possible. Fan set to maximum.*/ + CL_DEVICE_CLOCK_MODE_QUERYPROFILING_AMD = 0x6, /*Queries the profiling device clock ratios. Leaves the clock mode of the device unchanged*/ + CL_DEVICE_CLOCK_MODE_QUERYPEAK_AMD = 0x7, /*Queries the peak device clock ratios.Leaves the clock mode of the device unchanged*/ + CL_DEVICE_CLOCK_MODE_COUNT_AMD = 0x8, /*Maxmium count of device clock mode*/ +}; + +typedef struct _cl_set_device_clock_mode_input_amd +{ + /* specify the clock mode for AMD GPU device*/ + cl_DeviceClockMode_AMD clock_mode; +} cl_set_device_clock_mode_input_amd; + +typedef struct _cl_set_device_clock_mode_output_amd +{ + /*Ratio of current mem clock to peak clock as obtained from DeviceProperties::maxGpuClock*/ + cl_float memory_clock_ratio_to_peak; + /*Ratio of current gpu core clock to peak clock as obtained from DeviceProperties::maxGpuClock*/ + cl_float engine_clock_ratio_to_peak; +} cl_set_device_clock_mode_output_amd; + +/*! \brief Creates a new HW performance counter + * for the specified OpenCL context. + * + * \param device must be a valid OpenCL device. + * + * \param properties the list of properties of the hardware counter + * + * \param errcode_ret A non zero value if OpenCL failed to create PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_CONTEXT if the specified context is invalid. + * - CL_OUT_OF_RESOURCES if we couldn't create the object + * + * \return the created perfcounter object + */ +extern CL_API_ENTRY cl_perfcounter_amd CL_API_CALL clCreatePerfCounterAMD( + cl_device_id /* device */, cl_perfcounter_property* /* properties */, cl_int* /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Destroy a performance counter object. + * + * \param perf_counter the perfcounter object for release + * + * \return A non zero value if OpenCL failed to release PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_OPERATION if we failed to release the object + */ +extern CL_API_ENTRY cl_int CL_API_CALL clReleasePerfCounterAMD(cl_perfcounter_amd /* perf_counter */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Increments the perfcounter object reference count. + * + * \param perf_counter the perfcounter object for retain + * + * \return A non zero value if OpenCL failed to retain PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_OPERATION if we failed to release the object + */ +extern CL_API_ENTRY cl_int CL_API_CALL clRetainPerfCounterAMD(cl_perfcounter_amd /* perf_counter */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Enqueues the begin command for the specified counters. + * + * \param command_queue must be a valid OpenCL command queue. + * + * \param num_perf_counters the number of perfcounter objects in the array. + * + * \param perf_counters specifies an array of perfcounter objects. + * + * \return A non zero value if OpenCL failed to release PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_OPERATION if we failed to enqueue the begin operation + */ +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBeginPerfCounterAMD( + cl_command_queue /* command_queue */, cl_uint /* num_perf_counters */, + cl_perfcounter_amd* /* perf_counters */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Enqueues the end command for the specified counters. + * + * \param command_queue must be a valid OpenCL command queue. + * + * \param num_perf_counters the number of perfcounter objects in the array. + * + * \param perf_counters specifies an array of perfcounter objects. + * + * \param event the event object associated with the end operation. + * + * \return A non zero value if OpenCL failed to release PerfCounter + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_OPERATION if we failed to enqueue the end operation + */ +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueEndPerfCounterAMD( + cl_command_queue /* command_queue */, cl_uint /* num_perf_counters */, + cl_perfcounter_amd* /* perf_counters */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Retrieves the results from the counter objects. + * + * \param perf_counter specifies a perfcounter objects for query. + * + * \param param_name specifies the information to query. + * + * \param param_value is a pointer to memory where the appropriate result + * being queried is returned. If \a param_value is NULL, it is ignored. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type. + * + * \param param_value_size_ret returns the actual size in bytes of data copied + * to \a param_value. If \a param_value_size_ret is NULL, it is ignored. + * + * \param values must be a valid pointer to an array of 64-bit values + * and the array size must be equal to num_perf_counters. + * + * \return + * - CL_SUCCESS if the function is executed successfully. + * - CL_PROFILING_INFO_NOT_AVAILABLE if event isn't finished. + * - CL_INVALID_OPERATION if we failed to get the data + */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetPerfCounterInfoAMD( + cl_perfcounter_amd /* perf_counter */, cl_perfcounter_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, size_t* /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL clSetDeviceClockModeAMD( + cl_device_id /* device*/, cl_set_device_clock_mode_input_amd /* Clock_Mode_Input */, + cl_set_device_clock_mode_output_amd* /* Clock_Mode_Output */ + ) CL_API_SUFFIX__VERSION_1_0; + +#ifdef __cplusplus +} /*extern "C"*/ +#endif /*__cplusplus*/ + +#endif /*__CL_PROFILE_AMD_H*/ diff --git a/amdocl/cl_thread_trace_amd.h b/amdocl/cl_thread_trace_amd.h new file mode 100644 index 0000000000..fe9aed6f34 --- /dev/null +++ b/amdocl/cl_thread_trace_amd.h @@ -0,0 +1,363 @@ +/* Copyright (c) 2012-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef __CL_THREAD_TRACE_AMD_H +#define __CL_THREAD_TRACE_AMD_H + +#include "CL/cl_platform.h" + +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +typedef struct _cl_threadtrace_amd* cl_threadtrace_amd; +typedef cl_uint cl_thread_trace_param; +typedef cl_uint cl_threadtrace_info; + +/* cl_command_type */ +#define CL_COMMAND_THREAD_TRACE_MEM 0x4500 +#define CL_COMMAND_THREAD_TRACE 0x4501 + +/* cl_threadtrace_command_name_amd enumeration */ +typedef enum _cl_threadtrace_command_name_amd { + CL_THREAD_TRACE_BEGIN_COMMAND, + CL_THREAD_TRACE_END_COMMAND, + CL_THREAD_TRACE_PAUSE_COMMAND, + CL_THREAD_TRACE_RESUME_COMMAND +} cl_threadtrace_command_name_amd; + +// Thread trace parameters +enum ThreadTraceParameter { + CL_THREAD_TRACE_PARAM_TOKEN_MASK, + CL_THREAD_TRACE_PARAM_REG_MASK, + CL_THREAD_TRACE_PARAM_COMPUTE_UNIT_TARGET, + CL_THREAD_TRACE_PARAM_SHADER_ARRAY_TARGET, + CL_THREAD_TRACE_PARAM_SIMD_MASK, + CL_THREAD_TRACE_PARAM_VM_ID_MASK, + CL_THREAD_TRACE_PARAM_RANDOM_SEED, + CL_THREAD_TRACE_PARAM_CAPTURE_MODE, + CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK, + CL_THREAD_TRACE_PARAM_USER_DATA, + CL_THREAD_TRACE_PARAM_IS_WRAPPED +}; + +// CL_THREAD_TRACE_PARAM_TOKEN_MASK data selects for SI +enum CL_THREAD_TRACE_TOKEN_MASK { + // Time passed + CL_THREAD_TRACE_TOKEN_MASK_TIME_SI = 0x00000001, + // Resync the timestamp + CL_THREAD_TRACE_TOKEN_MASK_TIMESTAMP_SI = 0x00000002, + // A register write has occurred + CL_THREAD_TRACE_TOKEN_MASK_REG_SI = 0x00000004, + // A wavefront has started + CL_THREAD_TRACE_TOKEN_MASK_WAVE_START_SI = 0x00000008, + // Output space has been allocated for color/Z [Should be used for cl-gl] + CL_THREAD_TRACE_TOKEN_MASK_WAVE_PS_ALLOC_SI = 0x00000010, + // Output space has been allocated for vertex position [Should be used for cl-gl] + CL_THREAD_TRACE_TOKEN_MASK_WAVE_VS_ALLOC_SI = 0x00000020, + // Wavefront completion + CL_THREAD_TRACE_TOKEN_MASK_WAVE_END_SI = 0x00000040, + // An event has reached the top of a shader stage. In-order with WAVE_START + CL_THREAD_TRACE_TOKEN_MASK_EVENT_SI = 0x00000080, + // An event has reached the top of a compute shader stage. In-order with WAVE_START + CL_THREAD_TRACE_TOKEN_MASK_EVENT_CS_SI = 0x00000100, + // An event has reached the top of a shader stage for the second GFX pipe. In-order with + // WAVE_START. + //[Should be used for cl-gl] + CL_THREAD_TRACE_TOKEN_MASK_EVENT_GFX_SI = 0x00000200, + // The kernel has executed an instruction + CL_THREAD_TRACE_TOKEN_MASK_INST_SI = 0x00000400, + // The kernel has explicitly written the PC value + CL_THREAD_TRACE_TOKEN_MASK_INST_PC_SI = 0x00000800, + // The kernel has written user data into the thread trace buffer + CL_THREAD_TRACE_TOKEN_MASK_INST_USERDATA_SI = 0x00001000, + // Provides information about instruction scheduling + CL_THREAD_TRACE_TOKEN_MASK_ISSUE_SI = 0x00002000, + // The performance counter delta has been updated + CL_THREAD_TRACE_TOKEN_MASK_PERF_SI = 0x00004000, + // A miscellaneous event has been sent + CL_THREAD_TRACE_TOKEN_MASK_MISC_SI = 0x00008000, + // All possible tokens + CL_THREAD_TRACE_TOKEN_MASK_ALL_SI = 0x0000ffff, +}; + +// CL_THREAD_TRACE_PARAM_REG_MASK data selects +enum CL_THREAD_TRACE_REG_MASK { + // Event initiator + CL_THREAD_TRACE_REG_MASK_EVENT_SI = 0x00000001, + // Draw initiator [Should be used for cl-gl] + CL_THREAD_TRACE_REG_MASK_DRAW_SI = 0x00000002, + // Dispatch initiator + CL_THREAD_TRACE_REG_MASK_DISPATCH_SI = 0x00000004, + // User data from host + CL_THREAD_TRACE_REG_MASK_USERDATA_SI = 0x00000008, + // GFXDEC register (8-state) [Should be used for cl-gl] + CL_THREAD_TRACE_REG_MASK_GFXDEC_SI = 0x00000020, + // SHDEC register (many state) + CL_THREAD_TRACE_REG_MASK_SHDEC_SI = 0x00000040, + // Other registers + CL_THREAD_TRACE_REG_MASK_OTHER_SI = 0x00000080, + // All possible registers types + CL_THREAD_TRACE_REG_MASK_ALL_SI = 0x000000ff, +}; + +// CL_THREAD_TRACE_PARAM_VM_ID_MASK data selects +enum CL_THREAD_TRACE_VM_ID_MASK { + // Capture only data from the VM_ID used to write {SQTT}_BASE + CL_THREAD_TRACE_VM_ID_MASK_SINGLE = 0, + // Capture all data from all VM_IDs + CL_THREAD_TRACE_VM_ID_MASK_ALL = 1, + // Capture all data but only get target (a.k.a. detail) data from VM_ID used to write {SQTT}_BASE + CL_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL = 2 +}; + +// CL_THREAD_TRACE_PARAM_CAPTURE_MODE data +enum CL_THREAD_TRACE_CAPTURE_MODE { + // Capture all data in the thread trace buffer + CL_THREAD_TRACE_CAPTURE_ALL = 0, + // Capture only data between THREAD_TRACE_START and THREAD_TRACE_STOP events + CL_THREAD_TRACE_CAPTURE_SELECT = 1, + // Capture data between THREAD_TRACE_START and THREAD_TRACE_/STOP events, + // and global/reference data at all times + CL_THREAD_TRACE_CAPTURE_SELECT_DETAIL = 2 +}; + +// CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK data selects +enum CL_THREAD_TRACE_INSTRUCTION_MASK { + // Generate {SQTT}_TOKEN_INST tokens for all instructions + CL_THREAD_TRACE_INST_MASK_ALL, + // Generate {SQTT}_TOKEN_INST tokens for stalled instructions only + CL_THREAD_TRACE_INST_MASK_STALLED, + // Generate {SQTT}_TOKEN_INST messages for stalled and other (no op/wait/set prio/etc) + // instructions + CL_THREAD_TRACE_INST_MASK_STALLED_AND_IMMEDIATE, + // Generate {SQTT}_TOKEN_INST messages for immediate instructions only only [ Should be used only + // for CI] + CL_THREAD_TRACE_INST_MASK_IMMEDIATE_CI, +}; + +enum ThreadTraceInfo { + CL_THREAD_TRACE_SE, + CL_THREAD_TRACE_BUFFERS_FILLED, + CL_THREAD_TRACE_BUFFERS_SIZE +}; + + +/*! \brief Creates a new cl_threadtrace_amd object + * + * \param device must be a valid OpenCL device. + * + * \param errcode_ret A non zero value if OpenCL failed to create threadTrace + * -CL_INVALID_DEVICE if devices contains an invalid device. + * -CL_DEVICE_NOT_AVAILABLE if a device is currently not available even + * though the device was returned by clGetDeviceIDs. + * -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the + * OpenCL implementation on the device. + * -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + OpenCL implementation on the host. + * + * \return the created threadTrace object + */ +extern CL_API_ENTRY cl_threadtrace_amd CL_API_CALL clCreateThreadTraceAMD( + cl_device_id /* device */, cl_int* /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Destroys a cl_threadtrace_amd object. + * + * \param threadTrace the cl_threadtrace_amd object for release + * + * \return A non zero value if OpenCL failed to release threadTrace + * -CL_INVALID_VALUE if the thread_trace is not a valid OpenCL thread trace object + (cl_threadtrace_amd) . + * -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the + * OpenCL implementation on the device. + * -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + OpenCL implementation on the host. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clReleaseThreadTraceAMD(cl_threadtrace_amd /* threadTrace */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Increments the cl_threadtrace_amd object reference count. + * + * \param threadTrace the cl_threadtrace_amd object for retain + * + * \return A non zero value if OpenCL failed to retain threadTrace + * -CL_INVALID_VALUE if the thread_trace is not a valid thread trace object (cl_threadtrace_amd) . + * -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the + OpenCL implementation on the device. + * -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + OpenCL implementation on the host. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clRetainThreadTraceAMD(cl_threadtrace_amd /* threadTrace */ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Sets the cl_threadtrace_amd object configuration parameter. + * + * \param thread_trace the cl_threadtrace_amd object to set configuration parameter + * + * \param config_param the cl_thread_trace_param + * + * \param param_value corresponding to configParam + * + * \return A non zero value if OpenCL failed to set threadTrace buffer parameter + * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object. + * - CL_INVALID_VALUE if the invalid config_param or param_value enum values , are used. + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + event_wait_list is not NULL and num_events_in_wait_list is 0, + * - or if event objects in event_wait_list are not valid events. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + implementation on the device. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + OpenCL implementation on the host. + */ + +extern CL_API_ENTRY cl_int CL_API_CALL clSetThreadTraceParamAMD( + cl_threadtrace_amd /*thread_trace*/, cl_thread_trace_param /*config_param*/, + cl_uint /*param_value*/ + ) CL_API_SUFFIX__VERSION_1_0; + +/* \brief Enqueues the binding command to bind cl_threadtrace_amd to cl_mem object for trace + * recording.. + * + * \param command_queue must be a valid OpenCL command queue. + * + * \param thread_trace specifies the cl_threadtrace_amd object. + * + * \param mem_objects the cl_mem objects for trace recording + * + * \param mem_objects_num the number of cl_mem objects in the mem_objects + * + * \param buffer_size the size of each cl_mem object from mem_objects + * + * \param event_wait_list specify [is a pointer to] events that need to + * complete before this particular command can be executed. + * If \a event_wait_list is NULL, then this particular command does not wait + * on any event to complete. If \a event_wait_list is NULL, + * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL, + * the list of events pointed to by \a event_wait_list must be valid and + * \a num_events_in_wait_list must be greater than 0. The events specified in + * \a event_wait_list act as synchronization points. + * + * \param num_events_in_wait_list specify the number of events in + * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be + * greater than 0 if \a event_wait_list is not NULL. + * + * \param event returns an event object that identifies this particular + * command and can be used to query or queue a wait for this particular + * command to complete. \a event can be NULL in which case it will not be + * possible for the application to query the status of this command or queue a + * wait for this command to complete. + * \return A non zero value if OpenCL failed to set threadTrace buffer parameter + * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list + * are not the same. + * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object. + * - CL_INVALID_VALUE if the buffer_size is negative or zero. + * - CL_INVALID_VALUE if the sub_buffers_num I less than 1. + * - CL_INVALID_OPERATION if the mem_objects_num is not equal to the number of Shader Engines of + * the [GPU] device. + * - CL_INVALID_MEM_OBJECT if one on memory objects in the mem_objects array is not a valid memory + * object or memory_objects is NULL. + * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for the data store + * associated from the memory objects of the mem_objects array. + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + * event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in + * event_wait_list are not valid events. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + * implementation on the device. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + * OpenCL implementation on the host. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBindThreadTraceBufferAMD( + cl_command_queue command_queue, cl_threadtrace_amd /*thread_trace*/, cl_mem* /*mem_objects*/, + cl_uint /*mem_objects_num*/, cl_uint /*buffer_size*/, cl_uint /*num_events_in_wait_list*/, + const cl_event* /*event_wait_list*/, cl_event* /*event*/ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Get specific information about the OpenCL Thread Trace. + * + * \param thread_trace_info_param is an enum that identifies the Thread Trace information being + * queried. + * + * \param param_value is a pointer to memory location where appropriate values + * for a given \a threadTrace_info_param will be returned. If \a param_value is NULL, + * it is ignored. + * + * \param param_value_size specifies the size in bytes of memory pointed to by + * \a param_value. This size in bytes must be >= size of return type. + * + * \param param_value_size_ret returns the actual size in bytes of data being + * queried by param_value. If \a param_value_size_ret is NULL, it is ignored. + * + * \return One of the following values: + * CL_INVALID_OPERATION if cl_threadtrace_amd object is not valid + * - CL_INVALID_VALUE if \a param_name is not one of the supported + * values or if size in bytes specified by \a param_value_size is < size of + * return type and \a param_value is not a NULL value. + * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the + * OpenCL implementation on the host. + * CL_SUCCESS if the function is executed successfully. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetThreadTraceInfoAMD( + cl_threadtrace_amd /* thread_trace */, cl_threadtrace_info /*thread_trace_info_param*/, + size_t /*param_value_size*/, void* /*param_value*/, size_t* /*param_value_size_ret*/ + ) CL_API_SUFFIX__VERSION_1_0; + +/*! \brief Enqueues the thread trace command for the specified thread trace object. + * + * \param command_queue must be a valid OpenCL command queue. + * + * \param threadTraces specifies an array of cl_threadtrace_amd objects. + * + * \return A non zero value if OpenCL failed to release threadTrace + * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list + * are not the same. + * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object . + * - CL_INVALID_VALUE if the invalid command name enum value , not described in the + * cl_threadtrace_command_name_amd, is used. + * - CL_INVALID_OPERATION if the command enqueue failed. It can happen in the following cases: + * o BEGIN_COMMAND is queued for thread trace object for which memory object/s was/were not + * bound.. + * o END_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not queued. + * o PAUSE_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not + * queued. + * o RESUME_COMMAND is queued for thread trace object, for which PAUSE_COMMAND was not + * queued. + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + * event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in + * event_wait_list are not valid events. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + * implementation on the device. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL + * implementation on the host. + */ +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueThreadTraceCommandAMD( + cl_command_queue /*command_queue*/, cl_threadtrace_amd /*thread_trace*/, + cl_threadtrace_command_name_amd /*command_name*/, cl_uint /*num_events_in_wait_list*/, + const cl_event* /*event_wait_list*/, cl_event* /*event*/ + ) CL_API_SUFFIX__VERSION_1_0; + + +#ifdef __cplusplus +} /*extern "C"*/ +#endif /*__cplusplus*/ + +#endif /*__CL_THREAD_TRACE_AMD_H*/ diff --git a/amdocl/gl_functions.hpp b/amdocl/gl_functions.hpp new file mode 100644 index 0000000000..2d184bc2e6 --- /dev/null +++ b/amdocl/gl_functions.hpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2010-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +GLPREFIX(GLubyte*, glGetString, (GLenum name)) + +GLPREFIX(void, glBindBuffer, (GLenum target, GLuint buffer)) +//GLPREFIX(void, glBindFramebufferEXT, (GLenum target, GLuint framebuffer)) +GLPREFIX(void, glBindRenderbuffer, (GLenum target, GLuint renderbuffer)) +GLPREFIX(void, glBindTexture, (GLenum target, GLuint texture)) +GLPREFIX(void, glBufferData, (GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage)) + +GLPREFIX(GLenum, glCheckFramebufferStatusEXT, (GLenum target)) + +GLPREFIX(void, glDeleteBuffers, (GLsizei n, const GLuint* buffers)) +GLPREFIX(void, glDrawPixels, (GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels)) + +//GLPREFIX(void, glFramebufferRenderbufferEXT, (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer)) + +GLPREFIX(void, glGenBuffers, (GLsizei n, GLuint* buffers)) +//GLPREFIX(void, glGenFramebuffersEXT, (GLsizei n, GLuint* framebuffers)) +//10 +GLPREFIX(void, glGetBufferParameteriv, (GLenum target, GLenum pname, GLint* params)) +GLPREFIX(GLenum, glGetError, (void)) +GLPREFIX(void, glFinish, (void)) +GLPREFIX(void, glFlush, (void)) +GLPREFIX(GLenum, glClientWaitSync, (GLsync sync, GLbitfield flags, GLuint64 timeout)) +GLPREFIX(void, glGetIntegerv, (GLenum pname, GLint *params)) +GLPREFIX(void, glGetRenderbufferParameterivEXT, (GLenum target, GLenum pname, GLint* params)) +//GLPREFIX(GLubyte*, glGetString, (GLenum name)) +GLPREFIX(void, glGetTexImage, (GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels)) +GLPREFIX(void, glGetTexLevelParameteriv, (GLenum target, GLint level, GLenum pname, GLint *params)) +GLPREFIX(void, glGetTexParameteriv, (GLenum target, GLenum pname, GLint *params)) + +GLPREFIX(GLboolean, glIsBuffer, (GLuint buffer)) +GLPREFIX(GLboolean, glIsRenderbufferEXT, (GLuint renderbuffer)) +GLPREFIX(GLboolean, glIsTexture, (GLuint texture)) +//20 +GLPREFIX(GLvoid*, glMapBuffer, (GLenum target, GLenum access)) + +GLPREFIX(void, glReadPixels, (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *pixels)) + +GLPREFIX(void, glTexImage2D, (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels)) +GLPREFIX(void, glTexImage3D, (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels)) + +GLPREFIX(GLboolean, glUnmapBuffer, (GLenum target)) + +#undef GLPREFIX diff --git a/amdocl/icd/loader/icd_dispatch.h b/amdocl/icd/loader/icd_dispatch.h new file mode 100644 index 0000000000..84a3e305a7 --- /dev/null +++ b/amdocl/icd/loader/icd_dispatch.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016-2019 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * OpenCL is a trademark of Apple Inc. used under license by Khronos. + */ + +#ifndef _ICD_DISPATCH_H_ +#define _ICD_DISPATCH_H_ + +#ifndef CL_USE_DEPRECATED_OPENCL_1_0_APIS +#define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif + +#ifndef CL_USE_DEPRECATED_OPENCL_1_1_APIS +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif + +#ifndef CL_USE_DEPRECATED_OPENCL_1_2_APIS +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif + +#ifndef CL_USE_DEPRECATED_OPENCL_2_0_APIS +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif + +// cl.h +#include + +// cl_gl.h and required files +#ifdef _WIN32 +#include +#include +#include +#include +#include +#include +#endif +#include +#include +#include +#include +#include + +/* + * + * vendor dispatch table structure + * + */ + +struct _cl_platform_id +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_device_id +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_context +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_command_queue +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_mem +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_program +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_kernel +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_event +{ + cl_icd_dispatch *dispatch; +}; + +struct _cl_sampler +{ + cl_icd_dispatch *dispatch; +}; + +#endif // _ICD_DISPATCH_H_ + diff --git a/bin/hipcc b/bin/hipcc index 74ca844629..8214e508a6 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -222,12 +222,12 @@ if ($HIP_PLATFORM eq "clang") { $HIPCXXFLAGS .= " -Xclang -fallow-half-arguments-and-returns -D__HIP_HCC_COMPAT_MODE__=1"; } - if ($HIP_RUNTIME eq "HCC" ) { - $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; - $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; - $HIPCFLAGS .= " -isystem $HSA_PATH/include"; - } else { - $HIPCXXFLAGS .= " -fhip-new-launch-api"; + $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; + $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; + $HIPCFLAGS .= " -isystem $HSA_PATH/include"; + if (!($HIP_RUNTIME eq "HCC")) { + $HIPCXXFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api"; + $HIPCFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api"; } } elsif ($HIP_PLATFORM eq "hcc") { @@ -245,6 +245,9 @@ if ($HIP_PLATFORM eq "clang") { $HCC_VERSION_MAJOR=$HCC_VERSION; $HCC_VERSION_MAJOR=~s/\..*//; + $HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1; + $marker_path = "$ROCM_PATH/profiler/CXLActivityLogger"; + # HCC* may be used to compile src/hip_hcc.o (and also feed the HIPCXXFLAGS below) $HCC = "$HCC_HOME/bin/hcc"; $HCCFLAGS = "-hc -D__HIPCC__ -isystem $HCC_HOME/include "; @@ -292,6 +295,20 @@ if ($HIP_PLATFORM eq "clang") { $HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am "; # $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport "; + # Add trace marker library: + # TODO - once we cleanly separate the HIP API headers from HIP library headers this logic should move to CMakebuild option - apps do not need to see the marker library. + if ($HIP_ATP_MARKER) { + $marker_inc_path = "$marker_path/include"; + if (-e $marker_inc_path) { + $HIPCXXFLAGS .= " -isystem $marker_inc_path"; + } + } + + $marker_lib_path = "$marker_path/bin/x86_64"; + if (-e $marker_lib_path) { + $HIPLDFLAGS .= " -L$marker_lib_path -lCXLActivityLogger -Wl,--rpath=$marker_lib_path"; + } + if (not $isWindows) { $HIPLDFLAGS .= " -lm"; } @@ -336,7 +353,10 @@ my $runCmd = 1; my $buildDeps = 0; my $linkType = 1; my $setLinkType = 0; -my $coFormatv3 = 0; +my $coFormatv3 = 1; +if(defined $HIP_COMPILER and $HIP_COMPILER eq "hcc") { + $coFormatv3 = 0; +} my $funcSupp = 0; # enable function support my @options = (); @@ -474,13 +494,6 @@ foreach $arg (@ARGV) $optArg = $arg; } - ## This is a temporary workaround for CMake detection of OpenMP support. - ## It should be removed when the OpenMP detection c++ test in CMake is updated - ## and corrected CMake version is available. - if((defined $HIP_COMPILER) and ($HIP_COMPILER eq "clang") and ($arg eq '-fopenmp')) { - $HIPCXXFLAGS .= " -D_OPENMP " - } - ## process linker response file for hip-clang ## extract object files from static library and pass them directly to ## hip-clang in command line. diff --git a/bin/hipify-perl b/bin/hipify-perl index baaca2ae71..83e72ee711 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -223,6 +223,10 @@ sub simpleSubstitutions { $ft{'memory'} += s/\bcuMemcpy2DAsync\b/hipMemcpyParam2DAsync/g; $ft{'memory'} += s/\bcuMemcpy2DAsync_v2\b/hipMemcpyParam2DAsync/g; $ft{'memory'} += s/\bcuMemcpy2D_v2\b/hipMemcpyParam2D/g; + $ft{'memory'} += s/\bcuMemcpy3D\b/hipDrvMemcpy3D/g; + $ft{'memory'} += s/\bcuMemcpy3DAsync\b/hipDrvMemcpy3DAsync/g; + $ft{'memory'} += s/\bcuMemcpy3D_v2\b/hipDrvMemcpy3D/g; + $ft{'memory'} += s/\bcuMemcpy3DAsync_v2\b/hipDrvMemcpy3DAsync/g; $ft{'memory'} += s/\bcuMemcpyAtoH\b/hipMemcpyAtoH/g; $ft{'memory'} += s/\bcuMemcpyAtoH_v2\b/hipMemcpyAtoH/g; $ft{'memory'} += s/\bcuMemcpyDtoD\b/hipMemcpyDtoD/g; @@ -979,6 +983,8 @@ sub simpleSubstitutions { $ft{'type'} += s/\bCUDA_ARRAY_DESCRIPTOR_st\b/HIP_ARRAY_DESCRIPTOR/g; $ft{'type'} += s/\bCUDA_MEMCPY2D\b/hip_Memcpy2D/g; $ft{'type'} += s/\bCUDA_MEMCPY2D_st\b/hip_Memcpy2D/g; + $ft{'type'} += s/\bCUDA_MEMCPY3D\b/HIP_MEMCPY3D/g; + $ft{'type'} += s/\bCUDA_MEMCPY3D_st\b/HIP_MEMCPY3D/g; $ft{'type'} += s/\bCUaddress_mode\b/hipTextureAddressMode/g; $ft{'type'} += s/\bCUaddress_mode_enum\b/hipTextureAddressMode/g; $ft{'type'} += s/\bCUarray\b/hipArray */g; diff --git a/cmake/FindROCR.cmake b/cmake/FindROCR.cmake new file mode 100644 index 0000000000..2b198dcf8f --- /dev/null +++ b/cmake/FindROCR.cmake @@ -0,0 +1,16 @@ +# Try to find ROCR (Radeon Open Compute Runtime) +# +# Once found, this will define: +# - ROCR_FOUND - ROCR status (found or not found) +# - ROCR_INCLUDES - Required ROCR include directories +# - ROCR_LIBRARIES - Required ROCR libraries +find_path(FIND_ROCR_INCLUDES hsa.h HINTS /opt/rocm/include /opt/rocm/hsa/include PATH_SUFFIXES hsa) +find_library(FIND_ROCR_LIBRARIES hsa-runtime64 HINTS /opt/rocm/lib /opt/rocm/hsa/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ROCR DEFAULT_MSG + FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES) +mark_as_advanced(FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES) + +set(ROCR_INCLUDES ${FIND_ROCR_INCLUDES}) +set(ROCR_LIBRARIES ${FIND_ROCR_LIBRARIES}) diff --git a/cmake/FindROCT.cmake b/cmake/FindROCT.cmake new file mode 100644 index 0000000000..37f08fcff7 --- /dev/null +++ b/cmake/FindROCT.cmake @@ -0,0 +1,16 @@ +# Try to find ROCT (Radeon Open Compute Thunk) +# +# Once found, this will define: +# - ROCT_FOUND - ROCT status (found or not found) +# - ROCT_INCLUDES - Required ROCT include directories +# - ROCT_LIBRARIES - Required ROCT libraries +find_path(FIND_ROCT_INCLUDES hsakmt.h HINTS /opt/rocm/include) +find_library(FIND_ROCT_LIBRARIES hsakmt HINTS /opt/rocm/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ROCT DEFAULT_MSG + FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES) +mark_as_advanced(FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES) + +set(ROCT_INCLUDES ${FIND_ROCT_INCLUDES}) +set(ROCT_LIBRARIES ${FIND_ROCT_LIBRARIES}) diff --git a/configure b/configure new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/markdown/hip_profiling.md b/docs/markdown/hip_profiling.md new file mode 100644 index 0000000000..28ed37e321 --- /dev/null +++ b/docs/markdown/hip_profiling.md @@ -0,0 +1,279 @@ +# Profiling HIP Code + +This section describes the profiling and debugging capabilities that HIP provides. +Profiling information can viewed in the CodeXL visualization tool or printed directly to stderr as the application runs. +This document starts with some of the general capabilities of CodeXL and then describes some of the additional HIP marker and debug features. + + + +- [CodeXL Profiling](#codexl-profiling) + * [Collecting and Viewing Traces](#collecting-and-viewing-traces) + + [Using rocm-profiler timestamp profiling](#using-rocm-profiler-timestamp-profiling) + + [Using rocm-profiler performance counter collection:](#using-rocm-profiler-performance-counter-collection) + + [Using CodeXL to view profiling results:](#using-codexl-to-view-profiling-results) + + [More information on CodeXL](#more-information-on-codexl) + * [HIP Markers](#hip-markers) + + [Profiling HIP APIs](#profiling-hip-apis) + + [Adding markers to applications](#adding-markers-to-applications) + * [Additional HIP Profiling Features](#additional-hip-profiling-features) + + [Demangling C++ Kernel Names](#demangling-c-kernel-names) + + [Controlling when profiling starts and ends](#controlling-when-profiling-starts-and-ends) + + [Reducing timeline trace output file size](#reducing-timeline-trace-output-file-size) + + [How to enable profiling at HIP build time](#how-to-enable-profiling-at-hip-build-time) +- [Tracing and Debug](#tracing-and-debug) + * [Tracing HIP APIs](#tracing-hip-apis) + + [Color](#color) + + + +## CodeXL Profiling + +### Collecting and Viewing Traces + +#### Using rocm-profiler timestamp profiling +rocm-profiler is a command-line tool for tracing any application that uses ROCr API, including HCC and HIP. +rocm-profiler's timeline trace will show the beginning and end for all kernel commands, data transfer commands, and HSA Runtime (ROCr) API calls. The trace results are saved into a file, which by convention uses the "atp" extension. Here is an example that shows how to run the command-line profiler: +```shell +$ /opt/rocm/bin/rocm-profiler -o -A -T +``` + +#### Using rocm-profiler performance counter collection: +rocm-profiler can record performance counter information to provide greater insight inside a kernel, such as the memory bandwidth, ALU busy percentage, and cache statistics. +Collecting the common set of useful counters requires passing the counter configuration files for two passes: +``` +$ /opt/rocm/bin/rocm-profiler -C -O --counterfile /opt/rocm/profiler/counterfiles/counters_HSA_Fiji_pass1 --counterfile /opt/rocm/profiler/counterfiles/counters_HSA_Fiji_pass2 +``` + + +#### Using CodeXL to view profiling results: +The trace can be loaded and viewed in the CodeXL visualization tool: + +- Open the CodeXL GUI, create an new project, and switch to "Profile Mode": + - $ CodeXL & + - [File->New Project, leave fields as is, just click "OK"] + - [Profile->Switch to Profile Mode] +- Load timestamp tracing results into a timeline view: + - Right click on the project in the CodeXL Explorer view + - Click "Import Session..." + - Select to $HOME/apitrace.atp (or appropriate .atp file if you used another file name) + +- Load the performance counter results + - Right click on the project in the CodeXL Explorer view + - Click "Import Session..." + - Select $HOME/Session1.csv (or appropriate .csv file if you used another file name) + + +#### More information on CodeXL +rocm-profiler --help will show additional options and usage guidelines. + +See this [blog](http://gpuopen.com/getting-up-to-speed-with-the-codexl-gpu-profiler-and-radeon-open-compute/) for more information on profiling ROCm apps (including HIP) with CodeXL. + +The 2.2 version of Windows CodeXL does not correctly handle Linux line-endings. If you are collecting a trace on Linux and then viewing it with the 2.2 Windows CodeXL, first convert the line ending in the .atp file to Windows-style line endings. + +### HIP Markers +#### Profiling HIP APIs +HIP can generate markers at function beginning and end which are displayed on the CodeXL timeline view. +HIP 1.0 compiles marker support by default, and you can enable it by setting the HIP_PROFILE_API environment variable and then running the rocm-profiler: + +```shell + +# Use profile to generate timeline view: +export HIP_PROFILE_API=1 +$ /opt/rocm/bin/rocm-profiler -A -T + +Or +$ /opt/rocm/bin/rocm-profiler -e HIP_PROFILE_API=1 -A -T +``` + +HIP_PROFILE_API supports two levels of information. +- HIP_PROFILE_API=1 : Short format. Print name of API but no arguments. For example: +`hipMemcpy` +- HIP_PROFILE_API=2 : Long format. Print name of API + values of all function arguments. For example: +`hipMemcpy (0x7f32154db010, 0x50446e000, 4000000, hipMemcpyDeviceToHost)` + +#### Adding markers to applications + +Markers can be used to define application-specific events that will be recorded in the ATP file and displayed in the CodeXL GUI. +This can be particularly useful for visualizing how the higher-level phases of application behavior relate to the lower level HIP APIs, kernel launches, and data transfers. +For example, an instrumented machine learning framework could show the beginning and ending of each layer in the network. + +Markers have a specific begin and end time, and can be nested. Nested calls are displayed hierarchically in the CodeXL GUI, with each level of the hierarchy occupying a different row. + +The HIP APis are defined in "hip_profile.h": +``` +#include + +HIP_BEGIN_MARKER(const char *markerName, const char *groupName); +HIP_END_MARKER(); + +HIP_BEGIN_MARKER("Setup", "MyAppGroup"); +// ... +// application code for setup +// ... +HIP_END_MARKER(); +``` + +For C++ codes, HIP also provides a scoped marker which records the start time when constructed and the end time when the scoped marker is destructed at the end of the scope. This provides a convenient, single-line mechanism to record an event that neatly corresponds to a region of code. + +```cxx +void FunctionFoo(...) +{ + HIP_SCOPED_MARKER("FunctionFoo", "MyAppGroup"); // Marker starts recording here. + + // ... + // Function implementation + // ... + + // Marker destroyed here and records end time stamp. +}; +``` + +The HIP marker API is only supported on ROCm platform. The marker macros are defined on CUDA platforms and will compile, but are silently ignored at runtime. + +This [HIP sample](https://github.com/ROCm-Developer-Tools/HIP/tree/master/samples/2_Cookbook/2_Profiler) shows the profiler marker API used in a small application. + +More information on the marker API can be found in the profiler header file and PDF in a ROCm installation: +- /opt/rocm/profiler/CXLActivityLogger/include/CXLActivityLogger.h +- /opt/rocm/profiler/CXLActivityLogger/doc/CXLActivityLogger.pdf + +### Additional HIP Profiling Features +#### Demangling C++ Kernel Names +HIP includes the `hipdemangleatp` tool which can post-process an ATP file to "demangle" C++ names. +Mangled kernel names encode the C++ arguments and other information, and are guaranteed to be unique even for cases such as operator overloading. However, the mangled names can be quite verbose. For example: + +`ZZ39gemm_NoTransA_MICRO_NBK_M_N_K_TS16XMTS4RN2hc16accelerator_viewEPKflS3_lPfliiiiiiffEN3_EC__719__cxxamp_trampolineElililiiiiiiS3_iS3_S4_ff` + +`hipdemangleatp` will convert this into the more readable: +`gemm_NoTransA_MICRO_NBK_M_N_K_TS16XMTS4` + +The `hipdemangleatp` tool operates on the ATP file "in-place" and thus replaces the input file with the demangled version. + +``` +$ hipdemangleatp myfile.atp +``` + +The kernel name is also shown in some of the summary htlm files (Top10 kernels). These can be regenerated from the demangled ATP file by re-running rocm-profiler: +``` +$ rocm-profiler -T --atpfile myfile.atp +``` + +A future version of CodeXL may directly integrate demangle functionality. + + +#### Controlling when profiling starts and ends +hipProfilerStart() and hipProfilerEnd() can be inserted into an application to control which phases of the applications are profiled. +These APIs can be used to skip initialization code or to focus profiling on a desired region, and are particularly useful for large long-running applications. +See the API documentation for more information. These APIs work on both ROCm and CUDA paths. + +On ROCm, the following environment variables can be used to control when profiling occurs: + +``` +HIP_DB_START_API : Comma-separated list of tid.api_seq_num for when to start debug and profiling. +HIP_DB_STOP_API : Comma-separated list of tid.api_seq_num for when to stop debug and profiling. +``` + +HIP/ROCm assigns a monotonically increasing sequence number to the APIs called from each thread. The thread and API sequence number can be used in the above API to control when tracing starts and stops. These flags also control the HIP_DB messages (described below). + +When using these options, start the profiler with profiling disabled: +``` +# ROCm: +$ rocm-profiler --startdisabled ... + +# CUDA: +$ nvprof --profile-from-start-off ... +``` + +This feature is under development. + +#### Reducing timeline trace output file size +If the application is already recording the HIP APIs, the HSA APIs are somewhat redundant and the ATP file size can be substantially reduced by not recording these APIs. HIP includes a text file that lists all of the HSA APIs and can assist in this filtering: + +``` +$ rocm-profiler -F hip/bin/hsa-api-filter-cxl.txt +``` + +This file can be copied and edited to provide more selective HSA event recording. + + +#### How to enable profiling at HIP build time +Pre-built packages of HIP are not built with profiling support enabled.You must enable marker support manually when compiling HIP. + +1. Build HIP with ATP markers enabled +HIP pre-built packages are enabled with ATP marker support by default. +To enable ATP marker support when building HIP from source, use the option ```-DCOMPILE_HIP_ATP_MARKER=1``` during the cmake configure step. Build and install HIP. +```shell +$ mkdir build && cd build +$ cmake .. -DCOMPILE_HIP_ATP_MARKER +$ make install +``` + +2. Install ROCm-Profiler +Installing HIP from the [rocm](http://gpuopen.com/getting-started-with-boltzmann-components-platforms-installation/) pre-built packages, installs the ROCm-Profiler as well. +Alternatively, you can build ROCm-Profiler using the instructions [here](https://github.com/RadeonOpenCompute/ROCm-Profiler#building-the-rocm-profiler). + +3. Recompile the target application + +Then follow the steps above to collect a marker-enabled trace. + + +## Tracing and Debug + +### Tracing HIP APIs +The HIP runtime can print the HIP function strings to stderr using HIP_TRACE_API environment variable. +The trace prints two messages for each API - one at the beginning of the API call (line starts with "<<") and one at the end of the API call (line ends with ">>"). +Here's an example for one API followed by a description for the sections of the trace: + +``` +<> +``` + +- `<> +info: running on device gfx803 +info: allocate host mem ( 7.63 MB) +info: allocate device mem ( 7.63 MB) +<> +<> +info: copy Host2Device +<> +info: launch 'vector_square' kernel +1.5 hipLaunchKernel 'HIP_KERNEL_NAME(vector_square)' gridDim:{512,1,1} groupDim:{256,1,1} sharedMem:+0 stream#0.0 +info: copy Device2Host +<> +info: check result +PASSED! +``` + +HIP_TRACE_API supports multiple levels of debug information: + - 0x1 = print all HIP APIs. This is the most verbose setting; the flags below allow selecting a subset. + - 0x2 = print HIP APIs which initiate GPU kernel commands. Includes hipLaunchKernel, hipLaunchModuleKernel + - 0x4 = print HIP APIs which initiate GPU memory commands. Includes hipMemcpy*, hipMemset*. + - 0x8 = print HIP APIs which allocate or free memory. Includes hipMalloc, hipHostMalloc, hipFree, hipHostFree. + +These can be combined. For example, HIP_TRACE_API=6 shows a concise view of the HIP commands (both kernel and memory) that are sent to the GPU. + + +#### Color +Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors. +You can change the color used for the trace mode with the HIP_TRACE_API_COLOR environment variable. Possible values are None/Red/Green/Yellow/Blue/Magenta/Cyan/White. +None will disable use of color control codes for both the opening and closing and may be useful when saving the trace file or when a pure text trace is desired. + + + diff --git a/hip-config.cmake.in b/hip-config.cmake.in index ccfbf2b04f..baa7c1607f 100644 --- a/hip-config.cmake.in +++ b/hip-config.cmake.in @@ -137,11 +137,11 @@ if(HIP_COMPILER STREQUAL "clang") ) set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" + INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.." ) set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.." ) foreach(GPU_TARGET ${GPU_TARGETS}) diff --git a/include/hip/hcc_detail/channel_descriptor.h b/include/hip/hcc_detail/channel_descriptor.h index 38acff9951..a69558c8e4 100644 --- a/include/hip/hcc_detail/channel_descriptor.h +++ b/include/hip/hcc_detail/channel_descriptor.h @@ -29,8 +29,14 @@ THE SOFTWARE. #ifdef __cplusplus +#if __HIP_VDI__ +extern "C" { +#endif HIP_PUBLIC_API hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f); +#if __HIP_VDI__ +} +#endif static inline hipChannelFormatDesc hipCreateChannelDescHalf() { int e = (int)sizeof(unsigned short) * 8; diff --git a/include/hip/hcc_detail/driver_types.h b/include/hip/hcc_detail/driver_types.h index 1941f44617..ae8e8b1757 100644 --- a/include/hip/hcc_detail/driver_types.h +++ b/include/hip/hcc_detail/driver_types.h @@ -135,6 +135,47 @@ typedef enum hipResourceType { hipResourceTypePitch2D = 0x03 }hipResourceType; +typedef enum HIPresourcetype_enum { + HIP_RESOURCE_TYPE_ARRAY = 0x00, /**< Array resoure */ + HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */ + HIP_RESOURCE_TYPE_LINEAR = 0x02, /**< Linear resource */ + HIP_RESOURCE_TYPE_PITCH2D = 0x03 /**< Pitch 2D resource */ +} HIPresourcetype; + +/** + * hip address modes + */ +typedef enum HIPaddress_mode_enum { + HIP_TR_ADDRESS_MODE_WRAP = 0, + HIP_TR_ADDRESS_MODE_CLAMP = 1, + HIP_TR_ADDRESS_MODE_MIRROR = 2, + HIP_TR_ADDRESS_MODE_BORDER = 3 +} HIPaddress_mode; + +/** + * hip filter modes + */ +typedef enum HIPfilter_mode_enum { + HIP_TR_FILTER_MODE_POINT = 0, + HIP_TR_FILTER_MODE_LINEAR = 1 +} HIPfilter_mode; + +/** + * Texture descriptor + */ +typedef struct HIP_TEXTURE_DESC_st { + HIPaddress_mode addressMode[3]; /**< Address modes */ + HIPfilter_mode filterMode; /**< Filter mode */ + unsigned int flags; /**< Flags */ + unsigned int maxAnisotropy; /**< Maximum anisotropy ratio */ + HIPfilter_mode mipmapFilterMode; /**< Mipmap filter mode */ + float mipmapLevelBias; /**< Mipmap level bias */ + float minMipmapLevelClamp; /**< Mipmap minimum level clamp */ + float maxMipmapLevelClamp; /**< Mipmap maximum level clamp */ + float borderColor[4]; /**< Border Color */ + int reserved[12]; +} HIP_TEXTURE_DESC; + /** * hip texture resource view formats */ @@ -176,6 +217,45 @@ typedef enum hipResourceViewFormat { hipResViewFormatUnsignedBlockCompressed7 = 0x22 }hipResourceViewFormat; +typedef enum HIPresourceViewFormat_enum +{ + HIP_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */ + HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */ + HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */ + HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */ + HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, /**< 1 channel 32-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, /**< 2 channel 32-bit floating point */ + HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, /**< 4 channel 32-bit floating point */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, /**< Block compressed 1 */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, /**< Block compressed 2 */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, /**< Block compressed 3 */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, /**< Block compressed 4 unsigned */ + HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, /**< Block compressed 4 signed */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, /**< Block compressed 5 unsigned */ + HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, /**< Block compressed 5 signed */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */ + HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, /**< Block compressed 6 signed half-float */ + HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 /**< Block compressed 7 */ +} HIPresourceViewFormat; + /** * HIP resource descriptor */ @@ -204,6 +284,39 @@ typedef struct hipResourceDesc { } res; }hipResourceDesc; +typedef struct HIP_RESOURCE_DESC_st +{ + HIPresourcetype resType; /**< Resource type */ + + union { + struct { + hipArray_t hArray; /**< HIP array */ + } array; + struct { + hipMipmappedArray_t hMipmappedArray; /**< HIP mipmapped array */ + } mipmap; + struct { + hipDeviceptr_t devPtr; /**< Device pointer */ + hipArray_Format format; /**< Array format */ + unsigned int numChannels; /**< Channels per array element */ + size_t sizeInBytes; /**< Size in bytes */ + } linear; + struct { + hipDeviceptr_t devPtr; /**< Device pointer */ + hipArray_Format format; /**< Array format */ + unsigned int numChannels; /**< Channels per array element */ + size_t width; /**< Width of the array in elements */ + size_t height; /**< Height of the array in elements */ + size_t pitchInBytes; /**< Pitch between two rows in bytes */ + } pitch2D; + struct { + int reserved[32]; + } reserved; + } res; + + unsigned int flags; /**< Flags (must be zero) */ +} HIP_RESOURCE_DESC; + /** * hip resource view descriptor */ @@ -218,6 +331,22 @@ struct hipResourceViewDesc { unsigned int lastLayer; }; +/** + * Resource view descriptor + */ +typedef struct HIP_RESOURCE_VIEW_DESC_st +{ + HIPresourceViewFormat format; /**< Resource view format */ + size_t width; /**< Width of the resource view */ + size_t height; /**< Height of the resource view */ + size_t depth; /**< Depth of the resource view */ + unsigned int firstMipmapLevel; /**< First defined mipmap level */ + unsigned int lastMipmapLevel; /**< Last defined mipmap level */ + unsigned int firstLayer; /**< First layer index */ + unsigned int lastLayer; /**< Last layer index */ + unsigned int reserved[16]; +} HIP_RESOURCE_VIEW_DESC; + /** * Memory copy types * @@ -263,26 +392,29 @@ typedef struct hipMemcpy3DParms { } hipMemcpy3DParms; typedef struct HIP_MEMCPY3D { - size_t Depth; - size_t Height; - size_t WidthInBytes; - hipDeviceptr_t dstDevice; - size_t dstHeight; - void* dstHost; - size_t dstLOD; - hipMemoryType dstMemoryType; - size_t dstPitch; - size_t dstXInBytes; - size_t dstY; - size_t dstZ; - void* reserved0; - void* reserved1; - hipDeviceptr_t srcDevice; - size_t srcHeight; - const void* srcHost; - size_t srcLOD; - hipMemoryType srcMemoryType; - size_t srcPitch; + unsigned int srcXInBytes; + unsigned int srcY; + unsigned int srcZ; + unsigned int srcLOD; + hipMemoryType srcMemoryType; + const void* srcHost; + hipDeviceptr_t srcDevice; + hipArray_t srcArray; + unsigned int srcPitch; + unsigned int srcHeight; + unsigned int dstXInBytes; + unsigned int dstY; + unsigned int dstZ; + unsigned int dstLOD; + hipMemoryType dstMemoryType; + void* dstHost; + hipDeviceptr_t dstDevice; + hipArray_t dstArray; + unsigned int dstPitch; + unsigned int dstHeight; + unsigned int WidthInBytes; + unsigned int Height; + unsigned int Depth; } HIP_MEMCPY3D; static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz, diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index 8f07e48d46..efe6a60197 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -192,16 +192,6 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, stream, &config[0]); } -inline -__attribute__((visibility("hidden"))) -hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, - int numDevices, unsigned int flags) { - hip_impl::hip_init(); - auto& ps = hip_impl::get_program_state(); - return ihipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags, ps); - -} - template inline __attribute__((visibility("hidden"))) diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h index 77a7bba60d..6fa86e94b9 100644 --- a/include/hip/hcc_detail/hip_fp16.h +++ b/include/hip/hcc_detail/hip_fp16.h @@ -229,7 +229,7 @@ THE SOFTWARE. __host__ __device__ operator __half_raw() const { return __half_raw{data}; } __host__ __device__ - operator volatile __half_raw() const volatile + operator __half_raw() const volatile { return __half_raw{data}; } diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 582e0cdefa..28d3ae7051 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -108,9 +108,12 @@ extern int HIP_TRACE_API; #include #include #include -#include #if __HCC__ #include + #include +#else + #include + #include #endif // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) @@ -385,7 +388,7 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri #elif defined(__clang__) && defined(__HIP__) #define HIP_KERNEL_NAME(...) __VA_ARGS__ -#define HIP_SYMBOL(X) #X +#define HIP_SYMBOL(X) X typedef int hipLaunchParm; diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index b0d1c3570d..81e241e362 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -1482,18 +1482,18 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name); -hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName); -hipError_t hipGetSymbolSize(size_t* size, const void* symbolName); -hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol); +hipError_t hipGetSymbolSize(size_t* size, const void* symbol); +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); -hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); -hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes, size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)); -hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); @@ -1933,6 +1933,15 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent); */ hipError_t hipFreeArray(hipArray* array); +/** + * @brief Frees a mipmapped array on the device + * + * @param[in] mipmappedArray - Pointer to mipmapped array to free + * + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray); + /** * @brief Allocate an array on the device. * @@ -1947,6 +1956,39 @@ hipError_t hipFreeArray(hipArray* array); hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc, struct hipExtent extent, unsigned int flags); + +/** + * @brief Allocate a mipmapped array on the device + * + * @param[out] mipmappedArray - Pointer to allocated mipmapped array in device memory + * @param[in] desc - Requested channel format + * @param[in] extent - Requested allocation size (width field in elements) + * @param[in] numLevels - Number of mipmap levels to allocate + * @param[in] flags - Flags for extensions + * + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + */ +hipError_t hipMallocMipmappedArray( + hipMipmappedArray_t *mipmappedArray, + const struct hipChannelFormatDesc* desc, + struct hipExtent extent, + unsigned int numLevels, + unsigned int flags __dparm(0)); + +/** + * @brief Gets a mipmap level of a HIP mipmapped array + * + * @param[out] levelArray - Returned mipmap level HIP array + * @param[in] mipmappedArray - HIP mipmapped array + * @param[in] level - Mipmap level + * + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipGetMipmappedArrayLevel( + hipArray_t *levelArray, + hipMipmappedArray_const_t mipmappedArray, + unsigned int level); + /** * @brief Copies data between host and device. * @@ -2159,6 +2201,31 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); */ hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] pCopy 3D memory copy parameters + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy); + +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] pCopy 3D memory copy parameters + * @param[in] stream Stream to use + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream); + // doxygen end Memory /** * @} @@ -2957,17 +3024,6 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* hipFunction_t f, size_t dynSharedMemPerBlk, int blockSizeLimit, unsigned int flags); -/** - * @brief Returns occupancy for a device function. - * - * @param [out] numBlocks Returned occupancy - * @param [in] func Kernel function for which occupancy is calulated - * @param [in] blockSize Block size the kernel is intended to be launched with - * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block - */ -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( - int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk); - /** * @brief Returns occupancy for a device function. * @@ -2979,6 +3035,29 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor( int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (only default supported) + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); + +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] func Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk); + /** * @brief Returns occupancy for a device function. * @@ -2992,18 +3071,20 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault)); /** - * @brief Returns occupancy for a device function. + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel * - * @param [out] numBlocks Returned occupancy - * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated - * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block - * @param [in] flags Extra flags for occupancy calculation (only default supported) + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue */ -hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + const void* f, size_t dynSharedMemPerBlk, + int blockSizeLimit); -#if __HIP_VDI__ && !defined(__HCC__) /** * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched * on respective streams before enqueuing any other work on the specified streams from any other threads @@ -3018,7 +3099,6 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags); -#endif // doxygen end Version Management /** @@ -3260,6 +3340,206 @@ hipError_t hipLaunchKernel(const void* function_address, size_t sharedMemBytes __dparm(0), hipStream_t stream __dparm(0)); +#if __HIP_VDI__ +hipError_t hipBindTexture( + size_t* offset, + const textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size = UINT_MAX); + +hipError_t hipBindTexture2D( + size_t* offset, + const textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch); + +hipError_t hipBindTextureToArray( + const textureReference* tex, + hipArray_const_t array, + const hipChannelFormatDesc* desc); + +hipError_t hipBindTextureToMipmappedArray( + const textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc); + +hipError_t hipGetTextureAlignmentOffset( + size_t* offset, + const textureReference* texref); + +hipError_t hipGetTextureReference( + const textureReference** texref, + const void* symbol); + +hipError_t hipUnbindTexture(const textureReference* tex); + +hipError_t hipCreateTextureObject( + hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc); + +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); + +hipError_t hipGetChannelDesc( + hipChannelFormatDesc* desc, + hipArray_const_t array); + +hipError_t hipGetTextureObjectResourceDesc( + hipResourceDesc* pResDesc, + hipTextureObject_t textureObject); + +hipError_t hipGetTextureObjectResourceViewDesc( + hipResourceViewDesc* pResViewDesc, + hipTextureObject_t textureObject); + +hipError_t hipGetTextureObjectTextureDesc( + hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject); + +hipError_t hipTexRefGetAddress( + hipDeviceptr_t* dev_ptr, + const textureReference* texRef); + +hipError_t hipTexRefGetAddressMode( + hipTextureAddressMode* pam, + const textureReference* texRef, + int dim); + +hipError_t hipTexRefGetFilterMode( + hipTextureFilterMode* pfm, + const textureReference* texRef); + +hipError_t hipTexRefGetFlags( + unsigned int* pFlags, + const textureReference* texRef); + +hipError_t hipTexRefGetFormat( + hipArray_Format* pFormat, + int* pNumChannels, + const textureReference* texRef); + +hipError_t hipTexRefGetMaxAnisotropy( + int* pmaxAnsio, + const textureReference* texRef); + +hipError_t hipTexRefGetMipmapFilterMode( + hipTextureFilterMode* pfm, + const textureReference* texRef); + +hipError_t hipTexRefGetMipmapLevelBias( + float* pbias, + const textureReference* texRef); + +hipError_t hipTexRefGetMipmapLevelClamp( + float* pminMipmapLevelClamp, + float* pmaxMipmapLevelClamp, + const textureReference* texRef); + +hipError_t hipTexRefGetMipMappedArray( + hipMipmappedArray_t* pArray, + const textureReference* texRef); + +hipError_t hipTexRefSetAddress( + size_t* ByteOffset, + textureReference* texRef, + hipDeviceptr_t dptr, + size_t bytes); + +hipError_t hipTexRefSetAddress2D( + textureReference* texRef, + const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t dptr, + size_t Pitch); + +hipError_t hipTexRefSetAddressMode( + textureReference* texRef, + int dim, + hipTextureAddressMode am); + +hipError_t hipTexRefSetArray( + textureReference* tex, + hipArray_const_t array, + unsigned int flags); + +hipError_t hipTexRefSetBorderColor( + textureReference* texRef, + float* pBorderColor); + +hipError_t hipTexRefSetFilterMode( + textureReference* texRef, + hipTextureFilterMode fm); + +hipError_t hipTexRefSetFlags( + textureReference* texRef, + unsigned int Flags); + +hipError_t hipTexRefSetFormat( + textureReference* texRef, + hipArray_Format fmt, + int NumPackedComponents); + +hipError_t hipTexRefSetMaxAnisotropy( + textureReference* texRef, + unsigned int maxAniso); + +hipError_t hipTexRefSetMipmapFilterMode( + textureReference* texRef, + hipTextureFilterMode fm); + +hipError_t hipTexRefSetMipmapLevelBias( + textureReference* texRef, + float bias); + +hipError_t hipTexRefSetMipmapLevelClamp( + textureReference* texRef, + float minMipMapLevelClamp, + float maxMipMapLevelClamp); + +hipError_t hipTexRefSetMipmappedArray( + textureReference* texRef, + hipMipmappedArray* mipmappedArray, + unsigned int Flags); + +hipError_t hipMipmappedArrayCreate( + hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels); + +hipError_t hipMipmappedArrayDestroy( + hipMipmappedArray_t hMipmappedArray); + +hipError_t hipMipmappedArrayGetLevel( + hipArray_t* pLevelArray, + hipMipmappedArray_t hMipMappedArray, + unsigned int level); + +hipError_t hipTexObjectCreate( + hipTextureObject_t* pTexObject, + const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc); + +hipError_t hipTexObjectDestroy( + hipTextureObject_t texObject); + +hipError_t hipTexObjectGetResourceDesc( + HIP_RESOURCE_DESC* pResDesc, + hipTextureObject_t texObject); + +hipError_t hipTexObjectGetResourceViewDesc( + HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject); + +hipError_t hipTexObjectGetTextureDesc( + HIP_TEXTURE_DESC* pTexDesc, + hipTextureObject_t texObject); +#endif + /** * @} */ @@ -3269,6 +3549,60 @@ hipError_t hipLaunchKernel(const void* function_address, } /* extern "c" */ #endif +#if defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__) +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f),dynSharedMemPerBlk,blockSizeLimit); +} + +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f),dynSharedMemPerBlk,blockSizeLimit); +} +#endif // defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__) + +#if defined(__cplusplus) && !defined(__HCC__) + +template +hipError_t hipGetSymbolAddress(void** devPtr, const T &symbol) { + return ::hipGetSymbolAddress(devPtr, (const void *)&symbol); +} + +template +hipError_t hipGetSymbolSize(size_t* size, const T &symbol) { + return ::hipGetSymbolSize(size, (const void *)&symbol); +} + +template +hipError_t hipMemcpyToSymbol(const T& symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) { + return ::hipMemcpyToSymbol((const void*)&symbol, src, sizeBytes, offset, kind); +} + +template +hipError_t hipMemcpyToSymbolAsync(const T& symbol, const void* src, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyToSymbolAsync((const void*)&symbol, src, sizeBytes, offset, kind, stream); +} + +template +hipError_t hipMemcpyFromSymbol(void* dst, const T &symbol, + size_t sizeBytes, size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { + return ::hipMemcpyFromSymbol(dst, (const void*)&symbol, sizeBytes, offset, kind); +} + +template +hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream); +} + +#endif + #if USE_PROF_API #include #endif @@ -3307,12 +3641,16 @@ inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( class TlsData; +#if !__HIP_VDI__ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t size = UINT_MAX); +#endif +#if !__HIP_VDI__ hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t size, textureReference* tex); +#endif /* * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture @@ -3329,11 +3667,13 @@ hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode re * @param[in] size - Size of the memory area pointed to by devPtr * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown **/ +#if !__HIP_VDI__ template hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &desc, size, &tex); } +#endif /* * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture @@ -3349,81 +3689,114 @@ hipError_t hipBindTexture(size_t* offset, struct texture& tex, * @param[in] size - Size of the memory area pointed to by devPtr * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown **/ +#if !__HIP_VDI__ template hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, size_t size = UINT_MAX) { return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex); } +#endif // C API +#if !__HIP_VDI__ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch); +#endif +#if !__HIP_VDI__ hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t width, size_t height, textureReference* tex, size_t pitch); +#endif +#if !__HIP_VDI__ template hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, const void* devPtr, size_t width, size_t height, size_t pitch) { return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), width, height, &tex); } +#endif +#if !__HIP_VDI__ template hipError_t hipBindTexture2D(size_t* offset, struct texture& tex, const void* devPtr, const struct hipChannelFormatDesc& desc, size_t width, size_t height, size_t pitch) { return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &desc, width, height, &tex); } +#endif // C API +#if !__HIP_VDI__ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, const hipChannelFormatDesc* desc); +#endif +#if !__HIP_VDI__ hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, hipArray_const_t array, const struct hipChannelFormatDesc& desc, textureReference* tex); +#endif +#if !__HIP_VDI__ template hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array) { return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex); } +#endif +#if !__HIP_VDI__ template hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array, const struct hipChannelFormatDesc& desc) { return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, desc, &tex); } +#endif +#if !__HIP_VDI__ template inline static hipError_t hipBindTextureToArray(struct texture *tex, hipArray_const_t array, const struct hipChannelFormatDesc* desc) { return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, *desc, tex); } +#endif // C API +#if !__HIP_VDI__ hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, hipMipmappedArray_const_t mipmappedArray, const hipChannelFormatDesc* desc); +#endif +#if !__HIP_VDI__ template hipError_t hipBindTextureToMipmappedArray(const texture& tex, hipMipmappedArray_const_t mipmappedArray) { return hipSuccess; } +#endif +#if !__HIP_VDI__ template hipError_t hipBindTextureToMipmappedArray(const texture& tex, hipMipmappedArray_const_t mipmappedArray, const hipChannelFormatDesc& desc) { return hipSuccess; } +#endif #if __HIP_VDI__ && !defined(__HCC__) + +template +inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) { +return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize,(hipFunction_t)kernel, dynSharedMemPerBlk, blockSizeLimit); +} + template inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { @@ -3453,15 +3826,22 @@ inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchPara * * @return #hipSuccess **/ +#if !__HIP_VDI__ hipError_t hipUnbindTexture(const textureReference* tex); +#endif +#if !__HIP_VDI__ extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject); +#endif +#if !__HIP_VDI__ template hipError_t hipUnbindTexture(struct texture& tex) { return ihipUnbindTextureImpl(tex.textureObject); } +#endif +#if !__HIP_VDI__ hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array); hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref); hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol); @@ -3499,11 +3879,110 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex); hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc, hipDeviceptr_t devPtr, size_t pitch); +#endif hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc); hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); +#if __HIP_VDI__ +template +static inline hipError_t hipBindTexture( + size_t *offset, + const struct texture &tex, + const void *devPtr, + size_t size = UINT_MAX) +{ + return hipBindTexture(offset, tex, devPtr, tex.channelDesc, size); +} + +template +static inline hipError_t hipBindTexture( + size_t *offset, + const struct texture &tex, + const void *devPtr, + const struct hipChannelFormatDesc &desc, + size_t size = UINT_MAX) +{ + return hipBindTexture(offset, &tex, devPtr, &desc, size); +} + +template +static inline hipError_t hipBindTexture2D( + size_t *offset, + const struct texture &tex, + const void *devPtr, + size_t width, + size_t height, + size_t pitch) +{ + return hipBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch); +} + +template +static inline hipError_t hipBindTexture2D( + size_t *offset, + const struct texture &tex, + const void *devPtr, + const struct hipChannelFormatDesc &desc, + size_t width, + size_t height, + size_t pitch) +{ + return hipBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch); +} + +template +static inline hipError_t hipBindTextureToArray( + const struct texture &tex, + hipArray_const_t array) +{ + struct cudaChannelFormatDesc desc; + hipError_t err = hipGetChannelDesc(&desc, array); + return (err == hipSuccess) ? hipBindTextureToArray(tex, array, desc) : err; +} + +template +static inline hipError_t hipBindTextureToArray( + const struct texture &tex, + hipArray_const_t array, + const struct hipChannelFormatDesc &desc) +{ + return hipBindTextureToArray(&tex, array, &desc); +} + +template +static inline hipError_t hipBindTextureToMipmappedArray( + const struct texture &tex, + hipMipmappedArray_const_t mipmappedArray) +{ + struct hipChannelFormatDesc desc; + hipArray_t levelArray; + hipError_t err = hipGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0); + if (err != hipSuccess) { + return err; + } + err = hipGetChannelDesc(&desc, levelArray); + return (err == hipSuccess) ? hipBindTextureToMipmappedArray(tex, mipmappedArray, desc) : err; +} + +template +static inline hipError_t hipBindTextureToMipmappedArray( + const struct texture &tex, + hipMipmappedArray_const_t mipmappedArray, + const struct cudaChannelFormatDesc &desc) +{ + return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc); +} + +template +static inline hipError_t hipUnbindTexture( + const struct texture &tex) +{ + return hipUnbindTexture(&tex); +} +#endif + // doxygen end Texture /** * @} diff --git a/include/hip/hcc_detail/hip_texture_types.h b/include/hip/hcc_detail/hip_texture_types.h index fcd6d69dbe..e92babfd5a 100644 --- a/include/hip/hcc_detail/hip_texture_types.h +++ b/include/hip/hcc_detail/hip_texture_types.h @@ -57,25 +57,27 @@ struct __HIP_TEXTURE_ATTRIB texture : public textureReference { texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint, enum hipTextureAddressMode aMode = hipAddressModeClamp) { normalized = norm; - readMode = hipReadModeNormalizedFloat; + readMode = mode; filterMode = fMode; addressMode[0] = aMode; addressMode[1] = aMode; addressMode[2] = aMode; channelDesc = hipCreateChannelDesc(); sRGB = 0; + textureObject = nullptr; } texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode, struct hipChannelFormatDesc desc) { normalized = norm; - readMode = hipReadModeNormalizedFloat; + readMode = mode; filterMode = fMode; addressMode[0] = aMode; addressMode[1] = aMode; addressMode[2] = aMode; channelDesc = desc; sRGB = 0; + textureObject = nullptr; } }; diff --git a/include/hip/hcc_detail/hiprtc.h b/include/hip/hcc_detail/hiprtc.h index ec9c85716a..fecea75340 100644 --- a/include/hip/hcc_detail/hiprtc.h +++ b/include/hip/hcc_detail/hiprtc.h @@ -28,7 +28,9 @@ extern "C" { #include +#if !defined(_WIN32) #pragma GCC visibility push (default) +#endif enum hiprtcResult { HIPRTC_SUCCESS = 0, @@ -81,7 +83,9 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code); hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet); +#if !defined(_WIN32) #pragma GCC visibility pop +#endif #ifdef __cplusplus } diff --git a/include/hip/hcc_detail/ockl_image.h b/include/hip/hcc_detail/ockl_image.h new file mode 100644 index 0000000000..b32b23fda0 --- /dev/null +++ b/include/hip/hcc_detail/ockl_image.h @@ -0,0 +1,135 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +extern "C" { + +#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4))) + +__device__ float4::Native_vec_ __ockl_image_load_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c); + +__device__ float4::Native_vec_ __ockl_image_load_1Db(unsigned int ADDRESS_SPACE_CONSTANT*i, int c); + +__device__ float4::Native_vec_ __ockl_image_load_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l); + +__device__ void __ockl_image_store_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c); + +__device__ float4::Native_vec_ __ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float dx, float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float dx, float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float4::Native_vec_ dx, float4::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_gather4r_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4g_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +}; \ No newline at end of file diff --git a/include/hip/hcc_detail/texture_fetch_functions.h b/include/hip/hcc_detail/texture_fetch_functions.h new file mode 100644 index 0000000000..03c1780030 --- /dev/null +++ b/include/hip/hcc_detail/texture_fetch_functions.h @@ -0,0 +1,386 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#include +#include +#include + +#include + +#define TEXTURE_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; + +template +struct __hip_is_tex_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_tex_channel_type> +{ + static constexpr bool value = + __hip_is_tex_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 4)); +}; + +template +struct __hip_is_tex_normalized_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_tex_normalized_channel_type> +{ + static constexpr bool value = + __hip_is_tex_normalized_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 4)); +}; + +template < + typename T, + hipTextureReadMode readMode, + typename Enable = void> +struct __hip_tex_ret +{ + static_assert(std::is_same::value, "Invalid channel type!"); +}; + +template < + typename T, + hipTextureReadMode readMode> +using __hip_tex_ret_t = typename __hip_tex_ret::type; + +template +struct __hip_tex_ret< + T, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type::value, bool>::type> +{ + using type = T; +}; + +template< + typename T, + unsigned int rank> +struct __hip_tex_ret< + HIP_vector_type, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type>::value, bool>::type> +{ + using type = HIP_vector_type<__hip_tex_ret_t, rank>; +}; + +template +struct __hip_tex_ret< + T, + hipReadModeNormalizedFloat, + typename std::enable_if<__hip_is_tex_normalized_channel_type::value, bool>::type> +{ + using type = float; +}; + +template< + typename T, + unsigned int rank> +struct __hip_tex_ret< + HIP_vector_type, + hipReadModeNormalizedFloat, + typename std::enable_if<__hip_is_tex_normalized_channel_type>::value, bool>::type> +{ + using type = HIP_vector_type<__hip_tex_ret_t, rank>; +}; + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1Dfetch(texture t, int x) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_load_1Db(i, x); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1D(texture t, float x) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1D(i, s, x); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2D(texture t, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLayered(texture t, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLayered(texture t, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex3D(texture t, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemap(texture t, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLod(texture t, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLod(texture t, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLayeredLod(texture t, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLayeredLod(texture t, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex3DLod(texture t, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLod(texture t, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLayered(texture t, float x, float y, float z, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLayeredLod(texture t, float x, float y, float z, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapGrad(texture t, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLayeredGrad(texture t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DGrad(texture t, float x, float dPdx, float dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DGrad(texture t, float x, float y, float2 dPdx, float2 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLayeredGrad(texture t, float x, int layer, float dPdx, float dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLayeredGrad(texture t, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex3DGrad(texture t, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template < + typename T, + hipTextureReadMode readMode, + typename Enable = void> +struct __hip_tex2dgather_ret +{ + static_assert(std::is_same::value, "Invalid channel type!"); +}; + +template < + typename T, + hipTextureReadMode readMode> +using __hip_tex2dgather_ret_t = typename __hip_tex2dgather_ret::type; + +template +struct __hip_tex2dgather_ret< + T, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type::value, bool>::type> +{ + using type = HIP_vector_type; +}; + +template< + typename T, + unsigned int rank> +struct __hip_tex2dgather_ret< + HIP_vector_type, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type>::value, bool>::type> +{ + using type = HIP_vector_type; +}; + +template +struct __hip_tex2dgather_ret< + T, + hipReadModeNormalizedFloat, + typename std::enable_if<__hip_is_tex_normalized_channel_type::value, bool>::type> +{ + using type = float4; +}; + +template +static __forceinline__ __device__ __hip_tex2dgather_ret_t tex2Dgather(texture t, float x, float y, int comp=0) +{ + TEXTURE_PARAMETERS_INIT; + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + case 2: { + auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + case 3: { + auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + default: { + auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + } + return {}; +} + +#endif diff --git a/include/hip/hcc_detail/texture_indirect_functions.h b/include/hip/hcc_detail/texture_indirect_functions.h new file mode 100644 index 0000000000..2fe33f3ede --- /dev/null +++ b/include/hip/hcc_detail/texture_indirect_functions.h @@ -0,0 +1,501 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#include +#include +#include + +#include + +#define TEXTURE_OBJECT_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; + +template +struct __hip_is_itex_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_itex_channel_type> +{ + static constexpr bool value = + __hip_is_itex_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 4)); +}; + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1Dfetch(hipTextureObject_t textureObject, int x) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_load_1Db(i, x); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1Dfetch(T *ptr, hipTextureObject_t textureObject, int x) +{ + *ptr = tex1Dfetch(textureObject, x); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1D(hipTextureObject_t textureObject, float x) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1D(i, s, x); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1D(T *ptr, hipTextureObject_t textureObject, float x) +{ + *ptr = tex1D(textureObject, x); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2D(hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2D(T *ptr, hipTextureObject_t textureObject, float x, float y) +{ + *ptr = tex2D(textureObject, x, y); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex3D(T *ptr, hipTextureObject_t textureObject, float x, float y, float z) +{ + *ptr = tex3D(textureObject, x, y, z); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DLayered(T *ptr, hipTextureObject_t textureObject, float x, int layer) +{ + *ptr = tex1DLayered(textureObject, x, layer); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer) +{ + *ptr = tex1DLayered(textureObject, x, y, layer); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemap(T *ptr, hipTextureObject_t textureObject, float x, float y, float z) +{ + *ptr = texCubemap(textureObject, x, y, z); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer) +{ + *ptr = texCubemapLayered(textureObject, x, y, z, layer); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + case 2: { + auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + case 3: { + auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + default: { + auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + }; + return {}; +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2Dgather(T *ptr, hipTextureObject_t textureObject, float x, float y, int comp = 0) +{ + *ptr = texCubemapLayered(textureObject, x, y, comp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DLod(hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DLod(T *ptr, hipTextureObject_t textureObject, float x, float level) +{ + *ptr = tex1DLod(textureObject, x, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float level) +{ + *ptr = tex2DLod(textureObject, x, y, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex3DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + *ptr = tex3DLod(textureObject, x, y, z, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, int layer, float level) +{ + *ptr = tex1DLayeredLod(textureObject, x, layer, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + *ptr = tex2DLayeredLod(textureObject, x, y, layer, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + *ptr = texCubemapLod(textureObject, x, y, z, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast(&tmp); + return {}; +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + *ptr = texCubemapGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float level) +{ + *ptr = texCubemapLayeredLod(textureObject, x, y, z, layer, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dPdx, float dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DGrad(T *ptr, hipTextureObject_t textureObject, float x, float dPdx, float dPdy) +{ + *ptr = tex1DGrad(textureObject, x, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy) +{ + *ptr = tex2DGrad(textureObject, x, y, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex3DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + *ptr = tex3DGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex1DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy) +{ + *ptr = tex1DLayeredGrad(textureObject, x, layer, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast(&tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void tex2DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + *ptr = tex2DLayeredGrad(textureObject, x, y, layer, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ T texCubemapLayeredGrad(hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast(&tmp); + return {}; +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ void texCubemapLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + *ptr = texCubemapLayeredGrad(textureObject, x, y, z, layer, dPdx, dPdy); +} + +#endif diff --git a/include/hip/hip_ext.h b/include/hip/hip_ext.h index a618462d6e..9b54f7fa57 100644 --- a/include/hip/hip_ext.h +++ b/include/hip/hip_ext.h @@ -109,7 +109,9 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipEvent_t stopEvent = nullptr) __attribute__((deprecated("use hipExtModuleLaunchKernel instead"))); -#if !__HIP_VDI__ && defined(__cplusplus) +//#if !__HIP_VDI__ && defined(__cplusplus) +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) +//kernel_descriptor and hip_impl::make_kernarg are in "grid_launch_GGL.hpp" namespace hip_impl { inline diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index 3d76e92632..b0974aeef6 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -336,7 +336,7 @@ typedef enum hipDeviceAttribute_t { hipDeviceAttributeTexturePitchAlignment, /// /dev/null +} + +popd () { + command popd "$@" > /dev/null +} + + +ROCMDIR=@ROCM_PATH@ +ROCMLIBDIR=$ROCMDIR/lib +HIPDIR=$ROCMDIR/hip +HIPLIBDIR=$ROCMDIR/hip/lib + +# Soft-link to library files +HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$]) +mkdir -p $ROCMLIBDIR +mkdir -p $ROCMLIBDIR/cmake +pushd $ROCMLIBDIR +for f in $HIPLIBFILES +do + ln -s -r -f $HIPLIBDIR/$f $(basename $f) +done +# Make the hip cmake directory link. +pushd cmake +ln -s -r -f $HIPLIBDIR/cmake/hip hip +popd +popd diff --git a/packaging/hip-vdi.prerm b/packaging/hip-vdi.prerm new file mode 100755 index 0000000000..bf2be36523 --- /dev/null +++ b/packaging/hip-vdi.prerm @@ -0,0 +1,31 @@ +#!/bin/bash + +pushd () { + command pushd "$@" > /dev/null +} + +popd () { + command popd "$@" > /dev/null +} + +ROCMDIR=@ROCM_PATH@ +ROCMLIBDIR=$ROCMDIR/lib +HIPDIR=$ROCMDIR/hip +HIPLIBDIR=$ROCMDIR/hip/lib +([ ! -d $ROCMDIR ] || [ ! -d $HIPDIR ]) && exit 0 +([ ! -d $ROCMLIBDIR ] || [ ! -d $HIPLIBDIR ]) && exit 0 + +# Remove soft-links to libraries +HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$]) +pushd $ROCMLIBDIR +for f in $HIPLIBFILES; do + [ -e $f ] || continue + rm $(basename $f) +done +pushd cmake +unlink hip +popd +rmdir --ignore-fail-on-non-empty cmake +popd +rmdir --ignore-fail-on-non-empty $ROCMLIBDIR + diff --git a/packaging/hip-vdi.txt b/packaging/hip-vdi.txt new file mode 100644 index 0000000000..2bbe4331d0 --- /dev/null +++ b/packaging/hip-vdi.txt @@ -0,0 +1,57 @@ +cmake_minimum_required(VERSION 2.8.3) +project(hip_vdi) + +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64_static.a DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libhip_hcc.so DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libhiprtc.so DESTINATION lib) + +install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) + +############################# +# Packaging steps +############################# +set(CPACK_SET_DESTDIR TRUE) +set(CPACK_INSTALL_PREFIX "/opt/rocm/hip") + +## cmake generated target files contains IMPORTED_LOCATION_RELEASE etc. which +## is installation path when building the project, which may be different from +## the intallation path for packaging. These paths have to be replaced by +## the package installation path, otherwise apps using pkg-config will fail. +file(GLOB _target_files @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets*.cmake) +foreach(_target_file ${_target_files}) + execute_process(COMMAND sed -i s:@CMAKE_INSTALL_PREFIX@:${CPACK_INSTALL_PREFIX}:g ${_target_file}) +endforeach() +install(FILES ${_target_files} DESTINATION lib/cmake/hip) + +set(CPACK_PACKAGE_NAME "hip-vdi") +set(HCC_PACKAGE_NAME "vdi") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [VDI]") +set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") +set(CPACK_PACKAGE_CONTACT "Maneesh Gupta ") +set(CPACK_PACKAGE_VERSION @HIP_VERSION_MAJOR@.@HIP_VERSION_MINOR@.@HIP_VERSION_PATCH@) +set(CPACK_PACKAGE_VERSION_MAJOR @HIP_VERSION_MAJOR@) +set(CPACK_PACKAGE_VERSION_MINOR @HIP_VERSION_MINOR@) +set(CPACK_PACKAGE_VERSION_PATCH @HIP_VERSION_PATCH@) +set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}) +set(CPACK_GENERATOR "TGZ;DEB;RPM") +set(CPACK_BINARY_DEB "ON") +set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), comgr (>= 1.1), llvm-amdgpu") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_vdi, hip-hcc (= ${CPACK_PACKAGE_VERSION})") +set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_vdi") +set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_vdi") +set(CPACK_BINARY_RPM "ON") +set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") +set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") +set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") +set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") +string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION}) +set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, comgr >= 1.1, llvm-amdgpu") +set(CPACK_RPM_PACKAGE_PROVIDES "hip_vdi, hip-hcc = ${HIP_BASE_VERSION}") +set(CPACK_RPM_PACKAGE_OBSOLETES "hip_vdi") +set(CPACK_RPM_PACKAGE_CONFLICTS "hip_vdi") +set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") +set(CPACK_SOURCE_GENERATOR "TGZ") +include(CPack) diff --git a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp index 625d8cd742..b528b0c75d 100644 --- a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp +++ b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp @@ -109,7 +109,7 @@ int main() { /***********************************************************************************/ //Timing directly the dispatch -#ifdef __HIP_PLATFORM_HCC__ +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { hipExtLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, start, stop, 0); hipEventSynchronize(stop); diff --git a/samples/2_Cookbook/13_occupancy/occupancy.cpp b/samples/2_Cookbook/13_occupancy/occupancy.cpp index 01fa7aafed..e772e82b1d 100644 --- a/samples/2_Cookbook/13_occupancy/occupancy.cpp +++ b/samples/2_Cookbook/13_occupancy/occupancy.cpp @@ -44,6 +44,8 @@ void multiplyCPU(float* C, float* A, float* B, int N){ } } +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) + void launchKernel(float* C, float* A, float* B, bool manual){ hipDeviceProp_t devProp; @@ -93,8 +95,10 @@ void launchKernel(float* C, float* A, float* B, bool manual){ std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl; } } +#endif int main() { +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) float *A, *B, *C0, *C1, *cpuC; float *Ad, *Bd, *C0d, *C1d; int errors=0; @@ -173,4 +177,8 @@ int main() { free(C0); free(C1); free(cpuC); +#else + std::cout <<"hipOccupancyMaxPotentialBlockSize template not support for Clang compiler"< + +// hip header file +#include "hip/hip_runtime.h" +#include "hip/hip_profile.h" + +#define WIDTH 1024 + +#define NUM (WIDTH * WIDTH) + +#define THREADS_PER_BLOCK_X 4 +#define THREADS_PER_BLOCK_Y 4 +#define THREADS_PER_BLOCK_Z 1 + +#define ITERATIONS 10 + +// Cmdline parms to control start and stop triggers +int startTriggerIteration = -1; +int stopTriggerIteration = -1; + +// Device (Kernel) function, it must be void +__global__ void matrixTranspose(float* out, float* in, const int width) { + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + out[y * width + x] = in[x * width + y]; +} + +// CPU implementation of matrix transpose +void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { + for (unsigned int j = 0; j < width; j++) { + for (unsigned int i = 0; i < width; i++) { + output[i * width + j] = input[j * width + i]; + } + } +} + + +// Use a separate function to demonstrate how to use function name as part of scoped marker: +void runGPU(float* Matrix, float* TransposeMatrix, float* gpuMatrix, float* gpuTransposeMatrix) { + // __func__ is a standard C++ macro which expands to the name of the function, in this case + // "runGPU" + HIP_SCOPED_MARKER(__func__, "MyGroup"); + + for (int i = 0; i < ITERATIONS; i++) { + if (i == startTriggerIteration) { + hipProfilerStart(); + } + if (i == stopTriggerIteration) { + hipProfilerStop(); + } + + float eventMs = 0.0f; + + hipEvent_t start, stop; + hipEventCreate(&start); + hipEventCreate(&stop); + + + // Record the start event + hipEventRecord(start, NULL); + + // Memory transfer from host to device + hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + + // Record the stop event + hipEventRecord(stop, NULL); + hipEventSynchronize(stop); + + hipEventElapsedTime(&eventMs, start, stop); + + printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs); + + // Record the start event + hipEventRecord(start, NULL); + + // Lauching kernel from host + hipLaunchKernelGGL(matrixTranspose, + dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, + gpuMatrix, WIDTH); + + // Record the stop event + hipEventRecord(stop, NULL); + hipEventSynchronize(stop); + hipEventElapsedTime(&eventMs, start, stop); + + printf("kernel Execution time = %6.3fms\n", eventMs); + + // Record the start event + hipEventRecord(start, NULL); + + // Memory transfer from device to host + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + + // Record the stop event + hipEventRecord(stop, NULL); + hipEventSynchronize(stop); + + hipEventElapsedTime(&eventMs, start, stop); + + printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); + } +}; + + +int main(int argc, char* argv[]) { + if (argc >= 2) { + startTriggerIteration = atoi(argv[1]); + printf("info : will start tracing at iteration:%d\n", startTriggerIteration); + } + if (argc >= 3) { + stopTriggerIteration = atoi(argv[2]); + printf("info : will stop tracing at iteration:%d\n", stopTriggerIteration); + } + + float* Matrix; + float* TransposeMatrix; + float* cpuTransposeMatrix; + + float* gpuMatrix; + float* gpuTransposeMatrix; + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + + std::cout << "Device name " << devProp.name << std::endl; + + { + // Show example of how to create a "scoped marker". + // The scoped marker records the time spent inside the { scope } of the marker - the begin + // timestamp is at the beginning of the code scope, and the end is recorded when the SCOPE + // exits. This can be viewed in CodeXL timeline relative to other GPU and CPU events. This + // marker captures the time spent in setup including host allocation, initialization, and + // device memory allocation. + HIP_SCOPED_MARKER("Setup", "MyGroup"); + + + Matrix = (float*)malloc(NUM * sizeof(float)); + TransposeMatrix = (float*)malloc(NUM * sizeof(float)); + cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); + + // initialize the input data + for (int i = 0; i < NUM; i++) { + Matrix[i] = (float)i * 10.0f; + } + + + // allocate the memory on the device side + hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); + hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + + // FYI, the scoped-marker will be destroyed here when the scope exits, and will record its + // "end" timestamp. + } + + runGPU(Matrix, TransposeMatrix, gpuMatrix, gpuTransposeMatrix); + + + // show how to use explicit begin/end markers: + // We begin the timed region with HIP_BEGIN_MARKER, passing in the markerName and group: + // The region will stop when HIP_END_MARKER is called + // This is another way to mark begin/end - as an alternative to scoped markers. + HIP_BEGIN_MARKER("Check&TearDown", "MyGroup"); + + int errors = 0; + + // CPU MatrixTranspose computation + matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); + + // verify the results + double eps = 1.0E-6; + for (int i = 0; i < NUM; i++) { + if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { + errors++; + } + } + if (errors != 0) { + printf("FAILED: %d errors\n", errors); + } else { + printf("PASSED!\n"); + } + + // free the resources on device side + hipFree(gpuMatrix); + hipFree(gpuTransposeMatrix); + + // free the resources on host side + free(Matrix); + free(TransposeMatrix); + free(cpuTransposeMatrix); + + // This ends the last marker started in this thread, in this case "Check&TearDown" + HIP_END_MARKER(); + + return errors; +} diff --git a/samples/2_Cookbook/2_Profiler/Readme.md b/samples/2_Cookbook/2_Profiler/Readme.md new file mode 100644 index 0000000000..8b32beb348 --- /dev/null +++ b/samples/2_Cookbook/2_Profiler/Readme.md @@ -0,0 +1,47 @@ +## Using hipEvents to measure performance ### + +This tutorial is follow-up of the previous two tutorial where we learn how to write our first hip program, in which we compute Matrix Transpose and in second one, we added feature to measure time taken for memory transfer and kernel execution. In this tutorial, we'll explain how to use the codexl/rocm-profiler for hip timeline tracing. Also, we will augment the source code with additional markers so we can see the high-level application flow alongside the information that CodeXL automatically collects. + + +## Introduction: + +CodeXL and rocm-profiler are the tool used for profiling the application, which is of prominent use in optimizing the application by means of finding the memory bottlenecks and etc. + +## Requirement: +[CodeXL Installation](http://gpuopen.com/compute-product/codexl/) + +## prerequiste knowledge: + +Programmers familiar with CUDA, OpenCL will be able to quickly learn and start coding with the HIP API. In case you are not, don't worry. You choose to start with the best one. We'll be explaining everything assuming you are completely new to gpgpu programming. + +## Simple Matrix Transpose + +We will be using the Simple Matrix Transpose source code from the previous tutorial as it is. + +## Using CodeXL markers for HIP Functions + +HIP can generate markers at function being/end which are displayed on the CodeXL timeline view. To do this, you need to install ROCm-Profiler and enable HIP to generate the markers: + +1. Install ROCm-Profiler Installing HIP from the rocm pre-built packages, installs the ROCm-Profiler as well. Alternatively, you can build ROCm-Profiler using the instructions given below. + + +2. Run with profiler enabled to generate ATP file. +(These steps are also captured in the Makefile) +The HIP_PROFILE_API enables display of the HIP APIs on the CodeXL trimeline view. +`/opt/rocm/bin/rocm-profiler -o -A -e HIP_PROFILE_API=1 ` + +##Using HIP_TRACE_API + +You can also print the HIP function strings to stderr using HIP_TRACE_API environment variable. This can also be combined with the more detailed debug information provided by the HIP_DB switch. For example: +`HIP_TRACE_API=1 HIP_DB=0x2 ./myHipApp` +Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors. + +## More Info: +- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_faq.md) +- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md) +- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) +- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) +- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) +- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/src/h2f.cpp b/src/h2f.cpp index a8c60e7c48..84d067166f 100644 --- a/src/h2f.cpp +++ b/src/h2f.cpp @@ -57,12 +57,14 @@ static inline std::uint32_t __convert_float_to_half(float a) noexcept { // On machines without fp16 instructions, clang lowers llvm.convert.from.fp16 // to call of this function. -extern "C" float __gnu_h2f_ieee(unsigned short h){ +extern "C" __attribute__((visibility("default"))) +float __gnu_h2f_ieee(unsigned short h){ return __convert_half_to_float((std::uint32_t) h); } // On machines without fp16 instructions, clang lowers llvm.convert.to.fp16 // to call of this function. -extern "C" unsigned short __gnu_f2h_ieee(float f){ +extern "C" __attribute__((visibility("default"))) +unsigned short __gnu_f2h_ieee(float f){ return (unsigned short)__convert_float_to_half(f); } diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp index 93f5a82a2f..85aa0ad810 100644 --- a/src/hip_clang.cpp +++ b/src/hip_clang.cpp @@ -28,6 +28,7 @@ THE SOFTWARE. #include "hip_hcc_internal.h" #include "hip_fatbin.h" #include "trace_helper.h" +#include "program_state.inl" #ifdef __GNUC__ #pragma GCC visibility push (default) @@ -94,8 +95,10 @@ __hipRegisterFatBinary(const void* data) module->executable, agent); if (module->executable.handle) { - modules->at(deviceId) = module; - tprintf(DB_FB, "Loaded code object for %s\n", name); + hip_impl::program_state_impl::read_kernarg_metadata(image, module->kernargs); + modules->at(deviceId) = module; + + tprintf(DB_FB, "Loaded code object for %s, args size=%ld\n", name, module->kernargs.size()); } else { fprintf(stderr, "Failed to load code object for %s\n", name); abort(); @@ -157,16 +160,215 @@ extern "C" void __hipRegisterFunction( g_functions.insert(std::make_pair(hostFunction, std::move(functions))); } +static inline const char* hsa_strerror(hsa_status_t status) { + const char* str = nullptr; + if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) { + return str; + } + return "Unknown error"; +} + +struct RegisteredVar { +public: + RegisteredVar(): size_(0), devicePtr_(nullptr) {} + ~RegisteredVar() {} + + static inline const char* hsa_strerror(hsa_status_t status) { + const char* str = nullptr; + if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) { + return str; + } + return "Unknown error"; +} + +hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; + size_t getvarsize() const { return size_; }; + + size_t size_; // Size of the variable + hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. +}; + +struct DeviceVar { + void* shadowVptr; + std::string hostVar; + size_t size; + std::vector* modules; + std::vector rvars; + bool dyn_undef; +}; + +std::unordered_multimap g_vars; + +//The logic follows PlatformState::getGlobalVar in VDI RT +static DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod) { + DeviceVar* dvar = nullptr; + if (hmod != nullptr) { + // If module is provided, then get the var only from that module + auto var_range = g_vars.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + if ((*it->second.modules)[deviceId] == hmod) { + dvar = &(it->second); + break; + } + } + } else { + // If var count is < 2, return the var + if (g_vars.count(hostVar) < 2) { + auto it = g_vars.find(hostVar); + dvar = ((it == g_vars.end()) ? nullptr : &(it->second)); + } else { + // If var count is > 2, return the original var, + // if original var count != 1, return g_vars.end()/Invalid + size_t orig_global_count = 0; + auto var_range = g_vars.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + // when dyn_undef is set, it is a shadow var + if (it->second.dyn_undef == false) { + ++orig_global_count; + dvar = &(it->second); + } + } + dvar = ((orig_global_count == 1) ? dvar : nullptr); + } + } + return dvar; +} + +hipError_t ihipGetGlobalVar(hipDeviceptr_t* dev_ptr, size_t* size_ptr, + const char* hostVar, hipModule_t hmod) { + GET_TLS(); + auto ctx = ihipGetTlsDefaultCtx(); + + if (!ctx) return hipErrorInvalidValue; + + auto device = ctx->getDevice(); + + if (!device) return hipErrorInvalidValue; + + ihipDevice_t* currentDevice = ihipGetDevice(device->_deviceId); + + if (!currentDevice) return hipErrorInvalidValue; + + int deviceId = device->_deviceId; + + DeviceVar* dvar = findVar(std::string(hostVar), deviceId, hmod); + if (dvar == nullptr) return hipErrorInvalidValue; + + if (dvar->rvars[deviceId].getdeviceptr() == nullptr) return hipErrorInvalidValue; + + *size_ptr = dvar->rvars[deviceId].getvarsize(); + *dev_ptr = dvar->rvars[deviceId].getdeviceptr(); + return hipSuccess; +} + +static bool createGlobalVarObj(const hsa_executable_t& hsaExecutable, const hsa_agent_t& hasAgent, + const char* global_name, void** device_pptr, size_t* bytes) { + hsa_status_t status = HSA_STATUS_SUCCESS; + hsa_symbol_kind_t sym_type; + hsa_executable_symbol_t global_symbol; + std::string buildLog; + + /* Find HSA Symbol by name */ + status = hsa_executable_get_symbol_by_name(hsaExecutable, global_name, &hasAgent, + &global_symbol); + if (status != HSA_STATUS_SUCCESS) { + buildLog += "Error: Failed to find the Symbol by Name: "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } + + /* Find HSA Symbol Type */ + status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, + &sym_type); + if (status != HSA_STATUS_SUCCESS) { + buildLog += "Error: Failed to find the Symbol Type : "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } + + /* Make sure symbol type is VARIABLE */ + if (sym_type != HSA_SYMBOL_KIND_VARIABLE) { + buildLog += "Error: Symbol is not of type VARIABLE : "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } + + /* Retrieve the size of the variable */ + status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, bytes); + + if (status != HSA_STATUS_SUCCESS) { + buildLog += "Error: Failed to retrieve the Symbol Size : "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } + + /* Find HSA Symbol Address */ + status = hsa_executable_symbol_get_info(global_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, device_pptr); + if (status != HSA_STATUS_SUCCESS) { + buildLog += "Error: Failed to find the Symbol Address : "; + buildLog += hsa_strerror(status); + tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str()); + return false; + } else { + tprintf(DB_FB, "createGlobalVarObj: var %s : device=%p, size=%zu\n", global_name, *device_pptr, *bytes); + } + + return true; +} + +// Registers a device-side global variable. +// For each global variable in device code, there is a corresponding shadow +// global variable in host code. The shadow host variable is used to keep +// track of the value of the device side global variable between kernel +// executions. +// The basic logic is taken from VDI RT, but there is much difference. extern "C" void __hipRegisterVar( - std::vector* modules, - char* hostVar, - char* deviceVar, - const char* deviceName, - int ext, - int size, - int constant, - int global) + std::vector* modules, // The device modules containing code object + char* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + const char* deviceVar, // Variable name in device code + int ext, // Whether this variable is external + int size, // Size of the variable + int constant, // Whether this variable is constant + int global) // Unknown, always 0 { + HIP_INIT_API(__hipRegisterVar, modules, var, hostVar, deviceVar, ext, size, constant, global); + + DeviceVar dvar{var, std::string{ hostVar }, static_cast(size), modules, + std::vector{ g_deviceCnt }, false }; + + for (int deviceId = 0; deviceId < g_deviceCnt; deviceId++) { + auto device = ihipGetDevice(deviceId); + if(!device) { + continue; + } + hsa_executable_t& executable = (*modules)[deviceId]->executable; + hsa_agent_t& agent = g_allAgents[deviceId + 1]; + size_t bytes = 0; + hipDeviceptr_t devicePtr = nullptr; + + bool success = createGlobalVarObj(executable, agent, hostVar, &devicePtr, &bytes); + if(!success) { + return; + } + dvar.rvars[deviceId].devicePtr_ = devicePtr; + dvar.rvars[deviceId].size_ = bytes; + + hc::AmPointerInfo ptrInfo(nullptr, devicePtr, devicePtr, bytes, device->_acc, true, false); + hc::am_memtracker_add(devicePtr, ptrInfo); + + #if USE_APP_PTR_FOR_CTX + hc::am_memtracker_update(devicePtr, device->_deviceId, 0u, ihipGetTlsDefaultCtx()); + #else + hc::am_memtracker_update(devicePtr, device->_deviceId, 0u); + #endif + } + g_vars.insert(std::make_pair(std::string(hostVar), dvar)); } extern "C" void __hipUnregisterFatBinary(std::vector* modules) @@ -226,6 +428,41 @@ extern "C" hipError_t __hipPopCallConfiguration( return hipSuccess; } +int getCurrentDeviceId() +{ + GET_TLS(); + + int deviceId = 0; + auto ctx = ihipGetTlsDefaultCtx(); + + if(!ctx) return deviceId; + + LockedAccessor_CtxCrit_t crit(ctx->criticalData()); + + if(crit->_execStack.size() != 0) + { + auto &exec = crit->_execStack.top(); + + if (exec._hStream) { + deviceId = exec._hStream->getDevice()->_deviceId; + } else if (ctx->getDevice()) { + deviceId = ctx->getDevice()->_deviceId; + } + } else if (ctx->getDevice()) { + deviceId = ctx->getDevice()->_deviceId; + } + return deviceId; +} + +hipFunction_t ihipGetDeviceFunction(const void *hostFunction) +{ + int deviceId = getCurrentDeviceId(); + auto it = g_functions.find(hostFunction); + if (it == g_functions.end() || !it->second[deviceId]) { + return nullptr; + } + return it->second[deviceId]; +} hipError_t hipSetupArgument( const void *arg, diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index b1777955aa..803abe28e2 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -33,7 +33,7 @@ THE SOFTWARE. #include "hip_prof_api.h" #include "hip_util.h" #include "env.h" - +#include #if (__hcc_workweek__ < 16354) #error("This version of HIP requires a newer version of HCC."); @@ -1009,6 +1009,18 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod, hipStream_t ihipSyncAndResolveStream(hipStream_t, bool lockAcquired = 0); hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream); +/** + * @brief Copies the memory address and size of symbol @p symbolName + * + * @param[in] symbolName - Symbol on device + * @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol + * @param[out] size - Pointer to the size of the symbol + * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound, #hipErrorInvalidValue + * + */ +hipError_t ihipGetGlobalVar(hipDeviceptr_t* dev_ptr, size_t* size_ptr, const char* hostVar, + hipModule_t hmod = nullptr); + // Stream printf functions: inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s) { os << "stream:"; @@ -1080,4 +1092,14 @@ static inline ihipCtx_t* iihipGetTlsDefaultCtx(TlsData* tls) { return tls->defaultCtx; } +/** + * @brief Get device function from host kernel function pointer + * Needed only for clang + HIP-HCC RT + * + * @param [in] hostFunction host kernel function pointer + * + * @returns hipFuntion_t, nullptr + */ +hipFunction_t ihipGetDeviceFunction(const void *hostFunction); + #endif diff --git a/src/hip_module.cpp b/src/hip_module.cpp index e98afa3294..0f608d9843 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -344,6 +344,8 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList if (kds[i] == nullptr) { return hipErrorInvalidValue; } + if (!kds[i]->_kernarg_layout.empty()) continue; + hip_impl::kernargs_size_align kargs = ps.get_kernargs_size_align( reinterpret_cast(lp.func)); kds[i]->_kernarg_layout = *reinterpret_cast>*>( @@ -397,6 +399,14 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList return result; } +__attribute__((visibility("default"))) +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); + auto& ps = hip_impl::get_program_state(); + return ihipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags, ps); +} + void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size_t* usedLDS) { if (f->_is_code_object_v3) { @@ -736,7 +746,6 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL mg_sync *mg_sync_ptr = 0; vector mg_info_ptr; - result = hip_internal::ihipHostMalloc(tls, (void **)&mg_sync_ptr, sizeof(mg_sync), hipHostMallocDefault, true); if (result != hipSuccess) { return hipErrorInvalidValue; @@ -1091,7 +1100,12 @@ namespace hip_impl { hipError_t agent_globals::read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, const char* name) { - return impl->read_agent_global_from_process(dptr, bytes, name); + hipError_t result = impl->read_agent_global_from_process(dptr, bytes, name); + if(result != hipSuccess) { + // For Clang Compiler + Hcc Rt + result = ihipGetGlobalVar(dptr, bytes, name); + } + return result; } } // Namespace hip_impl. @@ -1259,19 +1273,34 @@ hipError_t ihipModuleGetFunction(TlsData *tls, hipFunction_t* func, hipModule_t if (!*func) return hipErrorInvalidValue; std::string name_str(name); + std::string namekd_str(name_str + ".kd"); + bool kernel_by_namekd = false; + auto kernel = find_kernel_by_name(hmod->executable, name_str.c_str(), agent); if (kernel.handle == 0u) { - name_str.append(".kd"); - kernel = find_kernel_by_name(hmod->executable, name_str.c_str(), agent); + kernel_by_namekd = true; //Find kernel by namekd_str + kernel = find_kernel_by_name(hmod->executable, namekd_str.c_str(), agent); } if (kernel.handle == 0u) return hipErrorNotFound; + //For hipModuleLoad(), hmod->kernargs must contain an args with key + //name_str or namekd_str. + //For hipLaunchKernelGGL(), hmod->kernargs is empty, thus we need + //insert hmod->kernargs[name_str] + auto it = hmod->kernargs.find(name_str); //Look up args from the original name + if (it == hmod->kernargs.end()) { + it = hmod->kernargs.find(namekd_str); //Look up args from .kd name + } + // TODO: refactor the whole ihipThisThat, which is a mess and yields the // below, due to hipFunction_t being a pointer to ihipModuleSymbol_t. + func[0][0] = *static_cast( - Kernel_descriptor{kernel_object(kernel), name_str, hmod->kernargs[name_str]}); + Kernel_descriptor{kernel_object(kernel), + kernel_by_namekd ? namekd_str : name_str, + it != hmod->kernargs.end() ? it->second : hmod->kernargs[name_str]}); return hipSuccess; } diff --git a/src/hiprtc.cpp b/src/hiprtc.cpp index 4efdbad653..a11207f337 100644 --- a/src/hiprtc.cpp +++ b/src/hiprtc.cpp @@ -250,7 +250,7 @@ struct _hiprtcProgram { const auto it{find_if(reader.sections.begin(), reader.sections.end(), [](const section* x) { - return x->get_name() == ".kernel"; + return (x->get_name() == ".hip_fatbin") || (x->get_name() == ".kernel"); })}; if (it == reader.sections.end()) return false; @@ -513,7 +513,7 @@ extern "C" hiprtcResult hiprtcCompileProgram(hiprtcProgram p, int n, const char* const auto src{p->writeTemporaryFiles(tmp.path())}; - vector args{hipcc, "-shared"}; + vector args{hipcc, "-fPIC -shared"}; if (n) args.insert(args.cend(), o, o + n); handleTarget(args); diff --git a/src/program_state.inl b/src/program_state.inl index 760dafea22..0314c7d4ed 100644 --- a/src/program_state.inl +++ b/src/program_state.inl @@ -19,6 +19,8 @@ #include #include #include "hc.hpp" +#include "hip_hcc_internal.h" +#include "trace_helper.h" #include @@ -734,6 +736,27 @@ public: != AMD_COMGR_STATUS_SUCCESS) return; + //Look up “.value_kind†to decide whether to ignore it + //See http://llvm.org/docs/AMDGPUUsage.html#code-object-v3-metadata-mattr-code-object-v3 + amd_comgr_metadata_node_t arg_value_kind_md; + if (amd_comgr_metadata_lookup(arg_md, ".value_kind", &arg_value_kind_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + + std::string arg_value_kind{ metadata_to_string(arg_value_kind_md) }; + + if (amd_comgr_destroy_metadata(arg_value_kind_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + + if (arg_value_kind.find("hidden_") == 0) { + if (amd_comgr_destroy_metadata(arg_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + + continue; //Ignore hidden arg + } + amd_comgr_metadata_node_t arg_size_md; if (amd_comgr_metadata_lookup(arg_md, ".size", &arg_size_md) != AMD_COMGR_STATUS_SUCCESS) @@ -937,14 +960,16 @@ public: auto it0 = get_functions(agent).find(function_address); - if (it0 == get_functions(agent).cend()) { - hip_throw(std::runtime_error{ + if (it0 != get_functions(agent).cend()) return it0->second; + + // For hip-clang compiler + Hcc RT + hipFunction_t f = ihipGetDeviceFunction((const void*)function_address); + if (f) return reinterpret_cast(*f); + + hip_throw(std::runtime_error{ "No device code available for function: " + std::string(name(function_address)) + ", for agent: " + name(agent)}); - } - - return it0->second; } const std::vector>& diff --git a/tests/hip_tests.txt b/tests/hip_tests.txt index f3ea49a0f9..67a4238520 100644 --- a/tests/hip_tests.txt +++ b/tests/hip_tests.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8.3) project(hip_tests) # Setup -set(HIP_PATH @CMAKE_INSTALL_PREFIX@) -set(ENV{HIP_PATH} ${HIP_PATH}) +#set(HIP_PATH @CMAKE_INSTALL_PREFIX@) +#set(ENV{HIP_PATH} ${HIP_PATH}) set(HIP_SRC_PATH @hip_SOURCE_DIR@) set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) include(${HIP_SRC_PATH}/tests/hit/HIT.cmake) diff --git a/tests/hit/HIT.cmake b/tests/hit/HIT.cmake index 828fbe2cc7..1c7f7ff464 100644 --- a/tests/hit/HIT.cmake +++ b/tests/hit/HIT.cmake @@ -109,11 +109,11 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _nvcc_opt elseif(_link_options_found) list(APPEND ${_link_options} ${arg}) elseif(_exclude_platforms_found) - set(${_exclude_platforms} ${arg}) + list(APPEND ${_exclude_platforms} ${arg}) elseif(_exclude_runtime_found) - set(${_exclude_runtime} ${arg}) + list(APPEND ${_exclude_runtime} ${arg}) elseif(_exclude_compiler_found) - set(${_exclude_compiler} ${arg}) + list(APPEND ${_exclude_compiler} ${arg}) elseif(_depends_found) list(APPEND ${_depends} ${arg}) else() @@ -160,11 +160,11 @@ macro(PARSE_CUSTOMBUILD_COMMAND _target _buildcmd _exclude_platforms _exclude_ru set(_depends_found TRUE) else() if(_exclude_platforms_found) - set(${_exclude_platforms} ${arg}) + list(APPEND ${_exclude_platforms} ${arg}) elseif(_exclude_runtime_found) - set(${_exclude_runtime} ${arg}) + list(APPEND ${_exclude_runtime} ${arg}) elseif(_exclude_compiler_found) - set(${_exclude_compiler} ${arg}) + list(APPEND ${_exclude_compiler} ${arg}) elseif(_depends_found) list(APPEND ${_depends} ${arg}) else() @@ -203,11 +203,11 @@ macro(PARSE_TEST_COMMAND _target _arguments _exclude_platforms _exclude_runtime set(_exclude_compiler_found TRUE) else() if(_exclude_platforms_found) - set(${_exclude_platforms} ${arg}) + list(APPEND ${_exclude_platforms} ${arg}) elseif(_exclude_runtime_found) - set(${_exclude_runtime} ${arg}) + list(APPEND ${_exclude_runtime} ${arg}) elseif(_exclude_compiler_found) - set(${_exclude_compiler} ${arg}) + list(APPEND ${_exclude_compiler} ${arg}) else() list(APPEND ${_arguments} ${arg}) endif() @@ -246,11 +246,11 @@ macro(PARSE_TEST_NAMED_COMMAND _target _testname _arguments _exclude_platforms _ set(_exclude_compiler_found TRUE) else() if(_exclude_platforms_found) - set(${_exclude_platforms} ${arg}) + list(APPEND ${_exclude_platforms} ${arg}) elseif(_exclude_runtime_found) - set(${_exclude_runtime} ${arg}) + list(APPEND ${_exclude_runtime} ${arg}) elseif(_exclude_compiler_found) - set(${_exclude_compiler} ${arg}) + list(APPEND ${_exclude_compiler} ${arg}) else() list(APPEND ${_arguments} ${arg}) endif() @@ -297,13 +297,13 @@ macro(HIT_ADD_FILES _dir _label _parent) string(REGEX REPLACE " " ";" _cmd "${_cmd}") parse_build_command(_target _sources _hipcc_options _hcc_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _depends ${_dir} ${_cmd}) string(REGEX REPLACE "/" "." target ${_label}/${_target}) - if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM}) + if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME}) + elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) insert_into_map("_exclude" "${target}" TRUE) - elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND ${HIP_COMPILER} IN_LIST _exclude_compiler) insert_into_map("_exclude" "${target}" TRUE) else() set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) @@ -336,13 +336,13 @@ macro(HIT_ADD_FILES _dir _label _parent) string(REGEX REPLACE " " ";" _cmd "${_cmd}") parse_custombuild_command(_target _buildcmd _exclude_platforms _exclude_runtime _exclude_compiler _depends ${_cmd}) string(REGEX REPLACE "/" "." target ${_label}/${_target}) - if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM}) + if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME}) + elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) insert_into_map("_exclude" "${target}" TRUE) - elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND ${HIP_COMPILER} IN_LIST _exclude_compiler) insert_into_map("_exclude" "${target}" TRUE) else() string(REGEX REPLACE ";" " " _buildcmd "${_buildcmd}") @@ -370,10 +370,10 @@ macro(HIT_ADD_FILES _dir _label _parent) parse_test_command(_target _arguments _exclude_platforms _exclude_runtime _exclude_compiler ${_cmd}) string(REGEX REPLACE "/" "." target ${_label}/${_target}) read_from_map("_exclude" "${target}" _exclude_test_from_build) - if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM}) - elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER}) - elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME}) - elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) + elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) + elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) + elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) elseif(_exclude_test_from_build STREQUAL TRUE) else() make_test(${_label}/${_target} ${_arguments}) @@ -392,10 +392,10 @@ macro(HIT_ADD_FILES _dir _label _parent) parse_test_named_command(_target _testname _arguments _exclude_platforms _exclude_runtime _exclude_compiler ${_cmd}) string(REGEX REPLACE "/" "." target ${_label}/${_target}) read_from_map("_exclude" "${target}" _exclude_test_from_build) - if(_exclude_platforms STREQUAL "all" OR _exclude_platforms STREQUAL ${HIP_PLATFORM}) - elseif(NOT _exclude_runtime AND _exclude_compiler STREQUAL ${HIP_COMPILER}) - elseif(NOT _exclude_compiler AND _exclude_runtime STREQUAL ${HIP_RUNTIME}) - elseif(_exclude_runtime STREQUAL ${HIP_RUNTIME} AND _exclude_compiler STREQUAL ${HIP_COMPILER}) + if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) + elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) + elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) + elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) elseif(_exclude_test_from_build STREQUAL TRUE) else() make_named_test(${_label}/${_target} ${_label}/${_testname}.tst ${_arguments}) diff --git a/tests/src/Negative/memory/hipMemory.cpp b/tests/src/Negative/memory/hipMemory.cpp index b062d05cc1..a71ee948f5 100644 --- a/tests/src/Negative/memory/hipMemory.cpp +++ b/tests/src/Negative/memory/hipMemory.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp index 8a1dc07b62..6f0662b82d 100644 --- a/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp +++ b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/dynamicLoading/complex_loading_behavior.cpp b/tests/src/dynamicLoading/complex_loading_behavior.cpp index 70066dc46d..954fa7da35 100644 --- a/tests/src/dynamicLoading/complex_loading_behavior.cpp +++ b/tests/src/dynamicLoading/complex_loading_behavior.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. /* HIT_START * BUILD_CMD: libfoo_amd %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM nvcc - * BUILD_CMD: libfoo_nvidia %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM hcc + * BUILD_CMD: libfoo_nvidia %hc %S/%s -o libfoo.so -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM hcc vdi * BUILD_CMD: %t %hc %S/%s -o %T/%t -ldl * TEST: %t * HIT_END diff --git a/tests/src/gcc/LaunchKernel.c b/tests/src/gcc/LaunchKernel.c index fc59fa9d30..d2fc854510 100644 --- a/tests/src/gcc/LaunchKernel.c +++ b/tests/src/gcc/LaunchKernel.c @@ -19,10 +19,10 @@ /* HIT_START - * BUILD_CMD: gpu.o %hc -I%hip-path/include -g -c %S/gpu.cpp -o %T/gpu.o EXCLUDE_HIP_PLATFORM nvcc - * BUILD_CMD: launchkernel.o %cc -D__HIP_PLATFORM_HCC__ -g -I%hip-path/include -c %S/LaunchKernel.c -o %T/launchkernel.o EXCLUDE_HIP_PLATFORM nvcc - * BUILD_CMD: LaunchKernel %hc %T/launchkernel.o %T/gpu.o -g -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/%t DEPENDS gpu.o launchkernel.o EXCLUDE_HIP_PLATFORM nvcc - * TEST: %t EXCLUDE_HIP_PLATFORM nvcc + * BUILD_CMD: gpu.o %hc -I%hip-path/include -g -c %S/gpu.cpp -o %T/gpu.o EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD_CMD: launchkernel.o %hc -D__HIP_PLATFORM_HCC__ -g -I%hip-path/include -c %S/LaunchKernel.c -o %T/launchkernel.o EXCLUDE_HIP_PLATFORM nvcc vdi + * BUILD_CMD: LaunchKernel %hc %T/launchkernel.o %T/gpu.o -g -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/%t DEPENDS gpu.o launchkernel.o EXCLUDE_HIP_PLATFORM nvcc vdi + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc vdi * HIT_END */ @@ -36,7 +36,7 @@ bool LaunchKernelArg() dim3 blocks = {1,1,1}; dim3 threads = {1,1,1}; - HIPCHECK(hipLaunchKernel(kernel, blocks, threads, NULL, 0, 0)); + HIPCHECK(hipLaunchKernel((const void *)kernel, blocks, threads, NULL, 0, 0)); return true; } @@ -52,7 +52,7 @@ bool LaunchKernelArg1() HIPCHECK(hipMalloc((void**)&A_d, sizeof(int))); void* Args[]={&A_d}; - HIPCHECK(hipLaunchKernel(kernel1, blocks, threads, Args, 0, 0)); + HIPCHECK(hipLaunchKernel((const void *)kernel1, blocks, threads, Args, 0, 0)); // Get the result back to host memory HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); @@ -84,7 +84,7 @@ bool LaunchKernelArg2() HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice)); void* Args[]={&A_d, &B_d}; - HIPCHECK(hipLaunchKernel(kernel2, blocks, threads, Args,0,0)); + HIPCHECK(hipLaunchKernel((const void *)kernel2, blocks, threads, Args,0,0)); // Get the result back to host memory HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); @@ -123,7 +123,7 @@ bool LaunchKernelArg3() HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice)); void* Args[]={&A_d, &B_d, &C_d}; - HIPCHECK(hipLaunchKernel(kernel3, blocks, threads, Args,0,0)); + HIPCHECK(hipLaunchKernel((const void *)kernel3, blocks, threads, Args,0,0)); // Get the result back to host memory HIPCHECK(hipMemcpy(&C, C_d, sizeof(int), hipMemcpyDeviceToHost)); @@ -154,7 +154,7 @@ bool LaunchKernelArg4() struct things t = {2,20,200}; void* Args[]={&A_d, &c, &s, &i, &t}; - HIPCHECK(hipLaunchKernel(kernel4, blocks, threads, Args, 0, 0)); + HIPCHECK(hipLaunchKernel((const void *)kernel4, blocks, threads, Args, 0, 0)); // Get the result back to host memory HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); diff --git a/tests/src/gcc/hipMalloc.c b/tests/src/gcc/hipMalloc.c index 19e90d5222..ebf163de28 100644 --- a/tests/src/gcc/hipMalloc.c +++ b/tests/src/gcc/hipMalloc.c @@ -18,10 +18,10 @@ * */ /* HIT_START - * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_NVCC__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_nv -L/usr/local/cuda/lib64 -lcudart EXCLUDE_HIP_PLATFORM hcc - * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_HCC__ -I%hip-path/include %S/%s -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc - * TEST: hipMalloc_nv EXCLUDE_HIP_PLATFORM hcc - * TEST: hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc + * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_NVCC__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_nv -L/usr/local/cuda/lib64 -lcudart EXCLUDE_HIP_PLATFORM hcc vdi + * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_HCC__ -I%hip-path/include %S/%s -Wl,--rpath=%hip-path/lib %hip-path/lib/libhip_hcc.so -o %T/hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc vdi + * TEST: hipMalloc_nv EXCLUDE_HIP_PLATFORM hcc vdi + * TEST: hipMalloc_hcc EXCLUDE_HIP_PLATFORM nvcc vdi * HIT_END */ diff --git a/tests/src/hiprtc/hiprtcGetLoweredName.cpp b/tests/src/hiprtc/hiprtcGetLoweredName.cpp index e7b88d26d2..a63e13af64 100644 --- a/tests/src/hiprtc/hiprtcGetLoweredName.cpp +++ b/tests/src/hiprtc/hiprtcGetLoweredName.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/tests/src/hiprtc/saxpy.cpp b/tests/src/hiprtc/saxpy.cpp index 156a44afe4..a08c1c2399 100755 --- a/tests/src/hiprtc/saxpy.cpp +++ b/tests/src/hiprtc/saxpy.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ @@ -143,7 +143,7 @@ int main() hipMemcpyDtoH(hOut.get(), dOut, bufferSize); for (size_t i = 0; i < n; ++i) { - if (a * hX[i] + hY[i] != hOut[i]) { failed("Validation failed."); } + if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) { failed("Validation failed."); } } hipFree(dX); diff --git a/tests/src/hostcall/hipHostcallFuncCall.cpp b/tests/src/hostcall/hipHostcallFuncCall.cpp index 4b73352bb7..15ac3e1f4e 100644 --- a/tests/src/hostcall/hipHostcallFuncCall.cpp +++ b/tests/src/hostcall/hipHostcallFuncCall.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/hostcall/hipHostcallPrintThings.cpp b/tests/src/hostcall/hipHostcallPrintThings.cpp index 742798c0b9..3d1a0c4e07 100644 --- a/tests/src/hostcall/hipHostcallPrintThings.cpp +++ b/tests/src/hostcall/hipHostcallPrintThings.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/kernel/hipExtLaunchKernelGGL.cpp b/tests/src/kernel/hipExtLaunchKernelGGL.cpp index 4164a87f1e..39c660322b 100644 --- a/tests/src/kernel/hipExtLaunchKernelGGL.cpp +++ b/tests/src/kernel/hipExtLaunchKernelGGL.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. void test(size_t N) { size_t Nbytes = N * sizeof(int); - +#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; @@ -51,6 +51,7 @@ void test(size_t N) { HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); +#endif } int main(int argc, char* argv[]) { diff --git a/tests/src/p2p/hipPeerToPeer_simple.cpp b/tests/src/p2p/hipPeerToPeer_simple.cpp index 205e3fcc04..90e7112356 100644 --- a/tests/src/p2p/hipPeerToPeer_simple.cpp +++ b/tests/src/p2p/hipPeerToPeer_simple.cpp @@ -24,9 +24,9 @@ THE SOFTWARE. /* HIT_START * BUILD: %t %s ../test_common.cpp - * TEST: %t EXCLUDE_HIP_PLATFORM hcc - * TEST: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM hcc - * TEST: %t --mirrorPeers EXCLUDE_HIP_PLATFORM hcc + * TEST: %t EXCLUDE_HIP_PLATFORM hcc vdi + * TEST: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM hcc vdi + * TEST: %t --mirrorPeers EXCLUDE_HIP_PLATFORM hcc vdi * HIT_END */ diff --git a/tests/src/printf/hipPrintfAltForms.cpp b/tests/src/printf/hipPrintfAltForms.cpp new file mode 100644 index 0000000000..062b688ed8 --- /dev/null +++ b/tests/src/printf/hipPrintfAltForms.cpp @@ -0,0 +1,76 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + printf("%#o\n", 042); + printf("%#x\n", 0x42); + printf("%#X\n", 0x42); + printf("%#08x\n", 0x42); + printf("%#f\n", -123.456); + printf("%#F\n", 123.456); + printf("%#e\n", 123.456); + printf("%#E\n", -123.456); + printf("%#g\n", -123.456); + printf("%#G\n", 123.456); + printf("%#a\n", 123.456); + printf("%#A\n", -123.456); + printf("%#.8x\n", 0x42); + printf("%#16.8x\n", 0x42); + printf("%-#16.8x\n", 0x42); +} + +int main(int argc, char **argv) { + std::string reference(R"here(042 +0x42 +0X42 +0x000042 +-123.456000 +123.456000 +1.234560e+02 +-1.234560E+02 +-123.456 +123.456 +0x1.edd2f1a9fbe77p+6 +-0X1.EDD2F1A9FBE77P+6 +0x00000042 + 0x00000042 +0x00000042 +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/tests/src/printf/hipPrintfBasic.cpp b/tests/src/printf/hipPrintfBasic.cpp new file mode 100644 index 0000000000..db2ee269db --- /dev/null +++ b/tests/src/printf/hipPrintfBasic.cpp @@ -0,0 +1,275 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" +#include + +// Global string constants don't work inside device functions, so we +// use a macro to repeat the declaration in host and device contexts. +DECLARE_DATA(); + +__global__ void kernel_uniform0(int *retval) { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + retval[tid] = printf("Hello World\n"); +} + +static void test_uniform0(int *retval, uint num_blocks, + uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_uniform0, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == strlen("Hello World\n")); + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 1); + HIPASSERT(linecount["Hello World"] == num_threads); +} + +__global__ void kernel_uniform1(int *retval) { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + retval[tid] = printf("Six times Eight is %d\n", 42); +} + +static void test_uniform1(int *retval, uint num_blocks, + uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_uniform1, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == strlen("Six times Eight is 42") + 1); + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 1); + HIPASSERT(linecount["Six times Eight is 42"] == num_threads); +} + +__global__ void kernel_divergent0(int *retval) { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + retval[tid] = printf("Thread ID: %d\n", tid); +} + +static void test_divergent0(int *retval, uint num_blocks, + uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_divergent0, dim3(num_blocks), + dim3(threads_per_block), 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != 10; ++ii) { + HIPASSERT(retval[ii] == 13); + } + + for (uint ii = 10; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == 14); + } + + std::vector threadIds; + for (std::string line; std::getline(CapturedData, line);) { + auto pos = line.find(':'); + HIPASSERT(line.substr(0, pos) == "Thread ID"); + threadIds.push_back(std::stoul(line.substr(pos + 2))); + } + + std::sort(threadIds.begin(), threadIds.end()); + HIPASSERT(threadIds.size() == num_threads); + HIPASSERT(threadIds.back() == num_threads - 1); +} + +__global__ void kernel_divergent1(int *retval) { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + if (tid % 2) { + retval[tid] = printf("Hello World\n"); + } else { + retval[tid] = -1; + } +} + +static void test_divergent1(int *retval, uint num_blocks, + uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_divergent1, dim3(num_blocks), + dim3(threads_per_block), 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + if (ii % 2) { + HIPASSERT(retval[ii] == strlen("Hello World\n")); + } else { + HIPASSERT(retval[ii] == -1); + } + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 1); + HIPASSERT(linecount["Hello World"] == num_threads / 2); +} + +__global__ void kernel_series(int *retval) { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int result = 0; + + result += printf("%s\n", msg_long1); + result += printf("%s\n", msg_short); + result += printf("%s\n", msg_long2); + + retval[tid] = result; +} + +static void test_series(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_series, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == + strlen(msg_long1) + strlen(msg_short) + strlen(msg_long2) + 3); + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_long1] == num_threads); + HIPASSERT(linecount[msg_long2] == num_threads); + HIPASSERT(linecount[msg_short] == num_threads); +} + +__global__ void kernel_divergent_loop() { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int result = 0; + + for (int i = 0; i <= tid; ++i) { + printf("%d\n", i); + } +} + +static void test_divergent_loop(uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + + hipLaunchKernelGGL(kernel_divergent_loop, dim3(num_blocks), dim3(threads_per_block), + 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + std::map count; + while (true) { + int i; + CapturedData >> i; + if (CapturedData.fail()) + break; + count[i]++; + } + + HIPASSERT(count.size() == num_threads); + for (int i = 0; i != num_threads; ++i) { + HIPASSERT(count[i] == num_threads - i); + } +} + +int main() { + uint num_blocks = 1; + uint threads_per_block = 64; + uint num_threads = num_blocks * threads_per_block; + + void *retval_void; + HIPCHECK(hipHostMalloc(&retval_void, 4 * num_threads)); + auto retval = reinterpret_cast(retval_void); + + test_uniform0(retval, num_blocks, threads_per_block); + test_uniform1(retval, num_blocks, threads_per_block); + test_divergent0(retval, num_blocks, threads_per_block); + test_divergent1(retval, num_blocks, threads_per_block); + test_series(retval, num_blocks, threads_per_block); + test_divergent_loop(num_blocks, threads_per_block); + + passed(); +} diff --git a/tests/src/printf/hipPrintfFlags.cpp b/tests/src/printf/hipPrintfFlags.cpp new file mode 100644 index 0000000000..e9feb2e5e8 --- /dev/null +++ b/tests/src/printf/hipPrintfFlags.cpp @@ -0,0 +1,68 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + printf("%08d\n", 42); + printf("%08i\n", -42); + printf("%08u\n", 42); + printf("%08g\n", 123.456); + printf("%0+8d\n", 42); + printf("%+d\n", -42); + printf("%+08d\n", 42); + printf("%-8s\n", "xyzzy"); + printf("% i\n", -42); + printf("%-16.8d\n", 42); + printf("%16.8d\n", 42); +} + +int main(int argc, char **argv) { + std::string reference(R"here(00000042 +-0000042 +00000042 +0123.456 ++0000042 +-42 ++0000042 +xyzzy +-42 +00000042 + 00000042 +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/tests/src/printf/hipPrintfManyDevices.cpp b/tests/src/printf/hipPrintfManyDevices.cpp new file mode 100644 index 0000000000..44c8ed5741 --- /dev/null +++ b/tests/src/printf/hipPrintfManyDevices.cpp @@ -0,0 +1,77 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +DECLARE_DATA(); + +__global__ void print_things() { + DECLARE_DATA(); + + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + const char *msg[] = {msg_short, msg_long1, msg_long2}; + + printf("%s\n", msg[tid % 3]); + if (tid % 3 == 0) + printf("%s\n", msg_short); + printf("%s\n", msg[(tid + 1) % 3]); + printf("%s\n", msg[(tid + 2) % 3]); +} + +int main() { + uint num_blocks = 14; + uint threads_per_block = 250; + uint threads_per_device = num_blocks * threads_per_block; + + int num_devices = 0; + hipGetDeviceCount(&num_devices); + + CaptureStream captured(stdout); + for (int i = 0; i != num_devices; ++i) { + hipSetDevice(i); + hipLaunchKernelGGL(print_things, dim3(num_blocks), dim3(threads_per_block), + 0, 0); + hipDeviceSynchronize(); + } + auto CapturedData = captured.getCapturedData(); + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + uint num_threads = threads_per_device * num_devices; + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_long1] == num_threads); + HIPASSERT(linecount[msg_long2] == num_threads); + HIPASSERT(linecount[msg_short] == + num_threads + ((threads_per_device + 2) / 3) * num_devices); + + passed(); +} diff --git a/tests/src/printf/hipPrintfManyWaves.cpp b/tests/src/printf/hipPrintfManyWaves.cpp new file mode 100644 index 0000000000..dae6dde307 --- /dev/null +++ b/tests/src/printf/hipPrintfManyWaves.cpp @@ -0,0 +1,301 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" +#include + +// Global string constants don't work inside device functions, so we +// use a macro to repeat the declaration in host and device contexts. +DECLARE_DATA(); + +__global__ void kernel_mixed0(int *retval) { + DECLARE_DATA(); + + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + ulong result = 0; + + // Three strings passed as divergent values to the same hostcall. + const char *msg; + switch (tid % 3) { + case 0: + msg = msg_short; + break; + case 1: + msg = msg_long1; + break; + case 2: + msg = msg_long2; + break; + } + + retval[tid] = printf("%s\n", msg); +} + +static void test_mixed0(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_mixed0, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + switch (ii % 3) { + case 0: + HIPASSERT(retval[ii] == strlen(msg_short) + 1); + break; + case 1: + HIPASSERT(retval[ii] == strlen(msg_long1) + 1); + break; + case 2: + HIPASSERT(retval[ii] == strlen(msg_long2) + 1); + break; + } + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3); + HIPASSERT(linecount[msg_long1] == (num_threads + 1) / 3); + HIPASSERT(linecount[msg_long2] == (num_threads + 0) / 3); +} + +__global__ void kernel_mixed1(int *retval) { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + + // Three strings passed to divergent hostcalls. + switch (tid % 3) { + case 0: + retval[tid] = printf("%s\n", msg_short); + break; + case 1: + retval[tid] = printf("%s\n", msg_long1); + break; + case 2: + retval[tid] = printf("%s\n", msg_long2); + break; + } +} + +static void test_mixed1(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_mixed1, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + switch (ii % 3) { + case 0: + HIPASSERT(retval[ii] == strlen(msg_short) + 1); + break; + case 1: + HIPASSERT(retval[ii] == strlen(msg_long1) + 1); + break; + case 2: + HIPASSERT(retval[ii] == strlen(msg_long2) + 1); + break; + } + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3); + HIPASSERT(linecount[msg_long1] == (num_threads + 1) / 3); + HIPASSERT(linecount[msg_long2] == (num_threads + 0) / 3); +} + +__global__ void kernel_mixed2(int *retval) { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + + // Three different strings. All workitems print all three, but + // in different orders. + const char *msg[] = {msg_short, msg_long1, msg_long2}; + retval[tid] = + printf("%s%s%s\n", msg[tid % 3], msg[(tid + 1) % 3], msg[(tid + 2) % 3]); +} + +static void test_mixed2(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_mixed2, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + HIPASSERT(retval[ii] == + strlen(msg_short) + strlen(msg_long1) + strlen(msg_long2) + 1); + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + std::string str1 = + std::string(msg_short) + std::string(msg_long1) + std::string(msg_long2); + std::string str2 = + std::string(msg_long1) + std::string(msg_long2) + std::string(msg_short); + std::string str3 = + std::string(msg_long2) + std::string(msg_short) + std::string(msg_long1); + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[str1] == (num_threads + 2) / 3); + HIPASSERT(linecount[str2] == (num_threads + 1) / 3); + HIPASSERT(linecount[str3] == (num_threads + 0) / 3); +} + +__global__ void kernel_mixed3(int *retval) { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int result = 0; + + result += printf("%s\n", msg_long1); + if (tid % 3 == 0) { + result += printf("%s\n", msg_short); + } + result += printf("%s\n", msg_long2); + + retval[tid] = result; +} + +static void test_mixed3(int *retval, uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + for (uint i = 0; i != num_threads; ++i) { + retval[i] = 0x23232323; + } + + hipLaunchKernelGGL(kernel_mixed3, dim3(num_blocks), dim3(threads_per_block), + 0, 0, retval); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + for (uint ii = 0; ii != num_threads; ++ii) { + if (ii % 3 == 0) { + HIPASSERT(retval[ii] == + strlen(msg_long1) + strlen(msg_short) + strlen(msg_long2) + 3); + } else { + HIPASSERT(retval[ii] == strlen(msg_long1) + strlen(msg_long2) + 2); + } + } + + std::map linecount; + for (std::string line; std::getline(CapturedData, line);) { + linecount[line]++; + } + + HIPASSERT(linecount.size() == 3); + HIPASSERT(linecount[msg_long1] == num_threads); + HIPASSERT(linecount[msg_long2] == num_threads); + HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3); +} + +__global__ void kernel_numbers() { + uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + for (uint i = 0; i != 7; ++i) { + uint base = tid * 21 + i * 3; + printf("%d %d %d\n", base, base + 1, base + 2); + } +} + +static void test_numbers(uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + uint num_threads = num_blocks * threads_per_block; + + hipLaunchKernelGGL(kernel_numbers, dim3(num_blocks), dim3(threads_per_block), + 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + std::vector points; + while (true) { + uint i; + CapturedData >> i; + if (CapturedData.fail()) + break; + points.push_back(i); + } + + std::sort(points.begin(), points.end()); + points.erase(std::unique(points.begin(), points.end()), points.end()); + HIPASSERT(points.size() == 21 * num_threads); + HIPASSERT(points.back() == 21 * num_threads - 1); + + passed(); +} + +int main(int argc, char **argv) { + uint num_blocks = 150; + uint threads_per_block = 250; + uint num_threads = num_blocks * threads_per_block; + + void *retval_void; + HIPCHECK(hipHostMalloc(&retval_void, 4 * num_threads)); + auto retval = reinterpret_cast(retval_void); + + test_mixed0(retval, num_blocks, threads_per_block); + test_mixed1(retval, num_blocks, threads_per_block); + test_mixed2(retval, num_blocks, threads_per_block); + test_mixed3(retval, num_blocks, threads_per_block); + test_numbers(num_blocks, threads_per_block); + + passed(); +} diff --git a/tests/src/printf/hipPrintfSpecifiers.cpp b/tests/src/printf/hipPrintfSpecifiers.cpp new file mode 100644 index 0000000000..009c76a968 --- /dev/null +++ b/tests/src/printf/hipPrintfSpecifiers.cpp @@ -0,0 +1,90 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + const char *N = nullptr; + const char *s = "hello world"; + + printf("xyzzy\n"); + printf("%%\n"); + printf("hello %% world\n"); + printf("%%s\n"); + // Two special tests to make sure that the compiler pass correctly + // skips over a '%%' without affecting the logic for locating + // string arguments. + printf("%%s%p\n", (void *)0xf01dab1eca55e77e); + printf("%%c%s\n", "xyzzy"); + printf("%c%c%c\n", 's', 'e', 'p'); + printf("%d\n", -42); + printf("%u\n", 42); + printf("%f\n", 123.456); + printf("%F\n", -123.456); + printf("%e\n", -123.456); + printf("%E\n", 123.456); + printf("%g\n", 123.456); + printf("%G\n", -123.456); + printf("%c\n", 'x'); + printf("%s\n", N); + printf("%p\n", N); + printf("%.*f %*.*s %p\n", 8, 3.14159, 8, 5, s, (void *)0xf01dab1eca55e77e); +} + +int main(int argc, char **argv) { + std::string reference(R"here(xyzzy +% +hello % world +%s +%s0xf01dab1eca55e77e +%cxyzzy +sep +-42 +42 +123.456000 +-123.456000 +-1.234560e+02 +1.234560E+02 +123.456 +-123.456 +x + +(nil) +3.14159000 hello 0xf01dab1eca55e77e +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/tests/src/printf/hipPrintfStar.cpp b/tests/src/printf/hipPrintfStar.cpp new file mode 100644 index 0000000000..e4d48e692f --- /dev/null +++ b/tests/src/printf/hipPrintfStar.cpp @@ -0,0 +1,54 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + printf("%*d\n", 16, 42); + printf("%.*d\n", 8, 42); + printf("%*.*d\n", -16, 8, 42); + printf("%*.*f %s * %.*s\n", 16, 8, 123.456, "hello", 5, "worldxyz"); +} + +int main(int argc, char **argv) { + std::string reference(R"here( 42 +00000042 +00000042 + 123.45600000 hello * world +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/tests/src/printf/hipPrintfWidthPrecision.cpp b/tests/src/printf/hipPrintfWidthPrecision.cpp new file mode 100644 index 0000000000..5bf6b65724 --- /dev/null +++ b/tests/src/printf/hipPrintfWidthPrecision.cpp @@ -0,0 +1,74 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * HIT_END + */ + +#include "test_common.h" +#include "printf_common.h" + +__global__ void test_kernel() { + printf("%16d\n", 42); + printf("%.8d\n", 42); + printf("%16.5d\n", -42); + printf("%.8x\n", 0x42); + printf("%.8o\n", 042); + printf("%16.8e\n", 12345.67891); + printf("%16.8f\n", -12345.67891); + printf("%16.8g\n", 12345.67891); + printf("%8.4e\n", -12345.67891); + printf("%8.4f\n", 12345.67891); + printf("%8.4g\n", 12345.67891); + printf("%4.2f\n", 12345.67891); + printf("%.1f\n", 12345.67891); + printf("%.5s\n", "helloxyz"); +} + +int main(int argc, char **argv) { + std::string reference(R"here( 42 +00000042 + -00042 +00000042 +00000042 + 1.23456789e+04 + -12345.67891000 + 12345.679 +-1.2346e+04 +12345.6789 +1.235e+04 +12345.68 +12345.7 +hello +)here"); + + CaptureStream captured(stdout); + hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + std::string device_output = gulp(CapturedData); + + HIPASSERT(device_output == reference); + passed(); +} diff --git a/tests/src/printf/printf_common.h b/tests/src/printf/printf_common.h new file mode 100644 index 0000000000..a2df88db9f --- /dev/null +++ b/tests/src/printf/printf_common.h @@ -0,0 +1,94 @@ +#ifndef COMMON_H +#define COMMON_H + +#include +#include +#include +#include +#include +#include +#include +#include + +struct CaptureStream { + int saved_fd; + int orig_fd; + int temp_fd; + + char tempname[13] = "mytestXXXXXX"; + + CaptureStream(FILE *original) { + orig_fd = fileno(original); + saved_fd = dup(orig_fd); + + temp_fd = mkstemp(tempname); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + + fflush(nullptr); + dup2(temp_fd, orig_fd); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + close(temp_fd); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + } + + void restoreStream() { + if (saved_fd == -1) + return; + fflush(nullptr); + dup2(saved_fd, orig_fd); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + close(saved_fd); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + saved_fd = -1; + } + + std::ifstream getCapturedData() { + restoreStream(); + std::ifstream temp(tempname); + return temp; + } + + ~CaptureStream() { + restoreStream(); + remove(tempname); + if (errno) { + error(0, errno, "Error"); + assert(false); + } + } +}; + +static std::string gulp(std::ifstream &input) { + std::string retval; + input.seekg(0, std::ios_base::end); + retval.resize(input.tellg()); + input.seekg(0, std::ios_base::beg); + input.read(&retval[0], retval.size()); + input.close(); + return retval; +} + +#define DECLARE_DATA() \ + const char *msg_short = "Carpe diem."; \ + const char *msg_long1 = "Lorem ipsum dolor sit amet, consectetur nullam. " \ + "In mollis imperdiet nibh nec ullamcorper."; \ + const char *msg_long2 = "Curabitur nec metus sit amet augue vehicula " \ + "ultrices ut id leo. Lorem ipsum dolor sit amet, " \ + "consectetur adipiscing elit amet."; + +#endif diff --git a/tests/src/runtimeApi/event/hipEventIpc.cpp b/tests/src/runtimeApi/event/hipEventIpc.cpp index b62e0a16aa..dd6c23e334 100644 --- a/tests/src/runtimeApi/event/hipEventIpc.cpp +++ b/tests/src/runtimeApi/event/hipEventIpc.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. // forces synchronization : set /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t --iterations 10 * HIT_END */ diff --git a/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp index 692d14cec7..febc664f7d 100644 --- a/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp +++ b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp @@ -18,7 +18,7 @@ * */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp b/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp index d08ff2a7e8..23ac329d64 100644 --- a/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp +++ b/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp @@ -60,16 +60,16 @@ int main() { HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); - HIPCHECK(hipStreamCreate(&s)); HIPCHECK(hipSetDevice(1)); + HIPCHECK(hipStreamCreate(&s)); HIPCHECK(hipMemcpyPeerAsync(X_d, 1, A_d, 0, Nbytes, s)); HIPCHECK(hipMemcpyPeerAsync(Y_d, 1, B_d, 0, Nbytes, s)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, N); HIPCHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK(hipDeviceSynchronize()); HIPCHECK(hipStreamSynchronize(s)); + HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); HIPCHECK(hipStreamDestroy(s)); diff --git a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp index 3db855c400..adface243d 100644 --- a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp +++ b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp @@ -26,17 +26,19 @@ THE SOFTWARE. * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 * TEST: %t EXCLUDE_HIP_PLATFORM all * HIT_END + */ #include "hip/hip_runtime.h" #include "test_common.h" -#ifdef __HIP_PLATFORM_HCC__ -#include -#endif - #define USE_HCC_MEMTRACKER 0 /* Debug flag to show the memtracker periodically */ +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_VDI__) +#include +#else +#define USE_HCC_MEMTRACKER 0 +#endif int elementSizes[] = {1, 16, 1024, 524288, 16 * 1000 * 1000}; int nSizes = sizeof(elementSizes) / sizeof(int); @@ -201,7 +203,8 @@ int main(int argc, char* argv[]) { }; for (int index = 0; index < nSizes; index++) { - testMultiGpu(dev0, dev1, elementSizes[index], false /*GPU Synchronization*/); + //ToDo: Enable when verified on all platforms + //testMultiGpu(dev0, dev1, elementSizes[index], false /*GPU Synchronization*/); testMultiGpu(dev0, dev1, elementSizes[index], true /*Host Synchronization*/); } diff --git a/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp b/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp index f4c72ca1c5..a26c9be4a0 100755 --- a/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp +++ b/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp @@ -19,7 +19,7 @@ THE SOFTWARE. /* HIT_START * BUILD_CMD: matmul.code %hc --genco %S/matmul.cpp -o matmul.code EXCLUDE_HIP_PLATFORM nvcc - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp b/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp index c565426f2d..102387cbe7 100644 --- a/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp +++ b/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp @@ -20,7 +20,7 @@ THE SOFTWARE. // Simple test for hipLaunchCooperativeKernelMultiDevice API. /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ @@ -178,8 +178,6 @@ int main() { hipLaunchCooperativeKernelMultiDevice(launchParamsList, nGpu, 0); - HIPCHECK(hipMemcpy(init, dC, sizeof(long), hipMemcpyDeviceToHost)); - if (*dC != (((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2)) { std::cout << "Data validation failed for grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n"; std::cout << "Test failed! \n"; diff --git a/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp b/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp index 896738892d..e0fcd4108b 100644 --- a/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp +++ b/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. // Simple test for hipLaunchCooperativeKernel API. /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp index 6bbbbbef34..11bd6e7d50 100644 --- a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp +++ b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp b/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp index 9ae5883608..e7c254e9fd 100644 --- a/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp +++ b/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ @@ -33,6 +33,9 @@ THE SOFTWARE. #define fileName "tex2d_kernel.code" +#if __HIP__ +__hip_pinned_shadow__ +#endif texture tex; bool testResult = false; diff --git a/tests/src/runtimeApi/module/tex2d_kernel.cpp b/tests/src/runtimeApi/module/tex2d_kernel.cpp index b12dd1815d..e744d88776 100644 --- a/tests/src/runtimeApi/module/tex2d_kernel.cpp +++ b/tests/src/runtimeApi/module/tex2d_kernel.cpp @@ -21,11 +21,15 @@ THE SOFTWARE. */ /* HIT_START - * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code + * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code EXCLUDE_HIP_PLATFORM vdi * HIT_END */ #include "hip/hip_runtime.h" + +#if __HIP__ +__hip_pinned_shadow__ +#endif extern texture tex; extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) { diff --git a/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp b/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp index 33ca8263e1..d8385669ea 100644 --- a/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp +++ b/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. // Test the Grid_Launch syntax. /* HIT_START - * BUILD: %t %s ../../test_common.cpp + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp index 5f267bba28..c22b390ecc 100644 --- a/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp +++ b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp @@ -11,7 +11,7 @@ #include "test_common.h" /* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/surface/hipSurfaceObj2D.cpp b/tests/src/surface/hipSurfaceObj2D.cpp index 4580220d1d..2724604279 100644 --- a/tests/src/surface/hipSurfaceObj2D.cpp +++ b/tests/src/surface/hipSurfaceObj2D.cpp @@ -1,5 +1,5 @@ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/test_common.h b/tests/src/test_common.h index 7d8c39e74c..8897dc938e 100644 --- a/tests/src/test_common.h +++ b/tests/src/test_common.h @@ -55,11 +55,15 @@ THE SOFTWARE. printf("%sPASSED!%s\n", KGRN, KNRM); \ exit(0); +// The real "assert" would have written to stderr. But it is +// sufficient to just fflush here without getting pedantic. This also +// ensures that we don't lose any earlier writes to stdout. #define failed(...) \ printf("%serror: ", KRED); \ printf(__VA_ARGS__); \ printf("\n"); \ printf("error: TEST FAILED\n%s", KNRM); \ + fflush(NULL); \ abort(); #define warn(...) \ diff --git a/tests/src/texture/hipBindTex2DPitch.cpp b/tests/src/texture/hipBindTex2DPitch.cpp index b01402c91d..8c57520c00 100644 --- a/tests/src/texture/hipBindTex2DPitch.cpp +++ b/tests/src/texture/hipBindTex2DPitch.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /*HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp index 52a0d99ac1..2e962fb05d 100644 --- a/tests/src/texture/hipBindTexRef1DFetch.cpp +++ b/tests/src/texture/hipBindTexRef1DFetch.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipNormalizedFloatValueTex.cpp b/tests/src/texture/hipNormalizedFloatValueTex.cpp index 609f6916f8..b4aa3e9c05 100644 --- a/tests/src/texture/hipNormalizedFloatValueTex.cpp +++ b/tests/src/texture/hipNormalizedFloatValueTex.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc hcc vdi * TEST: %t * HIT_END */ @@ -30,78 +30,92 @@ THE SOFTWARE. #define SIZE 10 static float getNormalizedValue(const float value, - const enum hipArray_Format texFormat) { - switch (texFormat) { - case HIP_AD_FORMAT_SIGNED_INT8: - return (value / SCHAR_MAX); - case HIP_AD_FORMAT_UNSIGNED_INT8: - return (value / UCHAR_MAX); - case HIP_AD_FORMAT_SIGNED_INT16: - return (value / SHRT_MAX); - case HIP_AD_FORMAT_UNSIGNED_INT16: - return (value / USHRT_MAX); - default: - return value; - } + const hipChannelFormatDesc& desc) { + if ((desc.x == 8) && (desc.f == hipChannelFormatKindSigned)) + return (value / SCHAR_MAX); + if ((desc.x == 8) && (desc.f == hipChannelFormatKindUnsigned)) + return (value / UCHAR_MAX); + if ((desc.x == 16) && (desc.f == hipChannelFormatKindSigned)) + return (value / SHRT_MAX); + if ((desc.x == 16) && (desc.f == hipChannelFormatKindUnsigned)) + return (value / USHRT_MAX); + return value; } #if __HIP__ __hip_pinned_shadow__ #endif -texture textureNormalizedVal_1D; +texture texc; +#if __HIP__ +__hip_pinned_shadow__ +#endif +texture texuc; + +#if __HIP__ +__hip_pinned_shadow__ +#endif +texture texs; + +#if __HIP__ +__hip_pinned_shadow__ +#endif +texture texus; + + +template __global__ void normalizedValTextureTest(unsigned int numElements, float* pDst) { unsigned int elementID = hipThreadIdx_x; if(elementID >= numElements) - return; - float coord =(float) elementID/(numElements-1); - pDst[elementID] = tex1D(textureNormalizedVal_1D, coord); + return; + float coord =(float) elementID/numElements; + if(std::is_same::value) + pDst[elementID] = tex1D(texc, coord); + else if(std::is_same::value) + pDst[elementID] = tex1D(texuc, coord); + else if(std::is_same::value) + pDst[elementID] = tex1D(texs, coord); + else if(std::is_same::value) + pDst[elementID] = tex1D(texus, coord); } template -bool textureTest(enum hipArray_Format texFormat) +bool textureTest(texture *tex) { - T hData[] = {65, 66, 67, 68, 69, 70, 71, 72,73,74}; - T *dData = NULL; - HIPCHECK(hipMalloc((void **) &dData, sizeof(T)*SIZE)); - HIPCHECK(hipMemcpyHtoD((hipDeviceptr_t)dData, hData, sizeof(T)*SIZE)); - textureReference* texRef = &textureNormalizedVal_1D; - HIPCHECK(hipTexRefSetAddressMode(texRef, 0, hipAddressModeClamp)); - HIPCHECK(hipTexRefSetAddressMode(texRef, 1, hipAddressModeClamp)); - HIPCHECK(hipTexRefSetFilterMode(texRef, hipFilterModePoint)); - HIPCHECK(hipTexRefSetFlags(texRef, HIP_TRSF_NORMALIZED_COORDINATES)); - HIPCHECK(hipTexRefSetFormat(texRef, texFormat, 1)); - - HIP_ARRAY_DESCRIPTOR desc; - desc.Width = SIZE; - desc.Height = 1; - desc.Format = texFormat; - desc.NumChannels = 1; - HIPCHECK(hipTexRefSetAddress2D(texRef, &desc, (hipDeviceptr_t)dData, sizeof(T)*SIZE)); - - bool testResult = true; + hipChannelFormatDesc desc = hipCreateChannelDesc(); + hipArray_t dData; + HIPCHECK(hipMallocArray(&dData, &desc, SIZE, 1, hipArrayDefault)); + + T hData[] = {65, 66, 67, 68, 69, 70, 71, 72, 73, 74}; + HIPCHECK(hipMemcpy2DToArray(dData, 0, 0, hData, sizeof(T)*SIZE, sizeof(T)*SIZE, 1, hipMemcpyHostToDevice)); + + tex->normalized = true; + tex->channelDesc = desc; + HIPCHECK(hipBindTextureToArray(tex, dData, &desc)); + float *dOutputData = NULL; HIPCHECK(hipMalloc((void **) &dOutputData, sizeof(float)*SIZE)); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(normalizedValTextureTest), dim3(1,1,1), dim3(SIZE,1,1), 0, 0, SIZE, dOutputData); + + hipLaunchKernelGGL(normalizedValTextureTest, dim3(1,1,1), dim3(SIZE,1,1), 0, 0, SIZE, dOutputData); float *hOutputData = new float[SIZE]; - HIPCHECK(hipMemcpyDtoH(hOutputData, (hipDeviceptr_t)dOutputData, (sizeof(float)*SIZE))); - + HIPCHECK(hipMemcpy(hOutputData, dOutputData, (sizeof(float)*SIZE), hipMemcpyDeviceToHost)); + + bool testResult = true; for(int i = 0; i < SIZE; i++) { - float expected = getNormalizedValue(float(hData[i]), texFormat); + float expected = getNormalizedValue(float(hData[i]), desc); if(expected != hOutputData[i]) { - printf("mismatch at index:%d for texType:%d output:%f\n",i,texFormat,hOutputData[i]); + printf("mismatch at index:%d output:%f expected:%f\n",i,hOutputData[i],expected); testResult = false; - break; + break; } } - hipFree(dData); - hipFree(dOutputData); - hipUnbindTexture(textureNormalizedVal_1D); + + HIPCHECK(hipFreeArray(dData)); + HIPCHECK(hipFree(dOutputData)); delete [] hOutputData; return testResult; } @@ -118,12 +132,11 @@ int main(int argc, char** argv) std::cout << "Arch - AMD GPU :: " << props.gcnArch << std::endl; #endif - status &= textureTest (HIP_AD_FORMAT_SIGNED_INT8); - status &= textureTest (HIP_AD_FORMAT_UNSIGNED_INT8); - status &= textureTest (HIP_AD_FORMAT_SIGNED_INT16); - status &= textureTest(HIP_AD_FORMAT_UNSIGNED_INT16); - status &= textureTest (HIP_AD_FORMAT_FLOAT); - + status &= textureTest (&texc); + status &= textureTest (&texuc); + status &= textureTest (&texs); + status &= textureTest(&texus); + if(status){ passed(); } diff --git a/tests/src/texture/hipTex1DFetchCheckModes.cpp b/tests/src/texture/hipTex1DFetchCheckModes.cpp index 9b7a36c6be..381d07280c 100644 --- a/tests/src/texture/hipTex1DFetchCheckModes.cpp +++ b/tests/src/texture/hipTex1DFetchCheckModes.cpp @@ -17,8 +17,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*HIT_START - * BUILD: %t %s ../test_common.cpp +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp index b476ae8062..5573cf6884 100644 --- a/tests/src/texture/hipTextureRef2D.cpp +++ b/tests/src/texture/hipTextureRef2D.cpp @@ -1,5 +1,5 @@ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/simpleTexture2DLayered.cpp b/tests/src/texture/simpleTexture2DLayered.cpp index e5014dae6b..f4d3aac1e5 100644 --- a/tests/src/texture/simpleTexture2DLayered.cpp +++ b/tests/src/texture/simpleTexture2DLayered.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/simpleTexture3D.cpp b/tests/src/texture/simpleTexture3D.cpp index 96b69811de..a494a1a6c0 100644 --- a/tests/src/texture/simpleTexture3D.cpp +++ b/tests/src/texture/simpleTexture3D.cpp @@ -21,12 +21,15 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ #include "test_common.h" +//typedef char T; +const char *sampleName = "simpleTexture3D"; + // Texture reference for 3D texture #if __HIP__ __hip_pinned_shadow__ @@ -44,26 +47,29 @@ __hip_pinned_shadow__ texture texc; template -__global__ void simpleKernel3DArray(T* outputData, +__global__ void simpleKernel3DArray(T* outputData, int width, int height,int depth) { for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - if(std::is_same::value) - outputData[i*width*height + j*width + k] = tex3D(texf, k, j, i); - else if(std::is_same::value) - outputData[i*width*height + j*width + k] = tex3D(texi, k, j, i); - else if(std::is_same::value) - outputData[i*width*height + j*width + k] = tex3D(texc, k, j, i); - } - } + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + if(std::is_same::value) + outputData[i*width*height + j*width + k] = tex3D(texf, k, j, i); + else if(std::is_same::value) + outputData[i*width*height + j*width + k] = tex3D(texi, k, j, i); + else if(std::is_same::value) + outputData[i*width*height + j*width + k] = tex3D(texc, k, j, i); + } + } } } +//////////////////////////////////////////////////////////////////////////////// +//! Run a simple test for tex3D +//////////////////////////////////////////////////////////////////////////////// template -void runTest(int width,int height,int depth,texture *tex, hipChannelFormatKind formatKind) +void runTest(int width,int height,int depth,texture *tex) { unsigned int size = width * height * depth * sizeof(T); T* hData = (T*) malloc(size); @@ -78,7 +84,7 @@ void runTest(int width,int height,int depth,texture(); hipArray *arr; HIPCHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height, depth), hipArrayDefault)); @@ -88,11 +94,7 @@ void runTest(int width,int height,int depth,texture(i,i,i,&texf, hipChannelFormatKindFloat); - runTest(i+1,i,i,&texi, hipChannelFormatKindSigned); - runTest(i,i+1,i,&texc, hipChannelFormatKindSigned); + runTest(i,i,i,&texf); + runTest(i+1,i,i,&texi); + runTest(i,i+1,i,&texc); } passed(); } + diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt new file mode 100644 index 0000000000..8c1ca1f2de --- /dev/null +++ b/vdi/CMakeLists.txt @@ -0,0 +1,179 @@ +#project("hip") +cmake_minimum_required(VERSION 3.5.1) + +set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-keep-memory -Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all -Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") + +if(CMAKE_CXX_FLAGS MATCHES "fsanitize=address") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") +endif() + +set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +set(LIB_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib) +set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) + +add_definitions(-D__HIP_VDI__ -D__HIP_PLATFORM_HCC__ -DLINUX -D__x86_64__ -D__AMD64__ -DUNIX_OS -DqLittleEndian -DOPENCL_MAJOR=2 -DOPENCL_MINOR=0 -DCL_TARGET_OPENCL_VERSION=220 -DWITH_AQL -DWITH_ONLINE_COMPILER -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DATI_COMP_GCC -DWITH_HSA_DEVICE -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DVEGA10_ONLY=false -DWITH_LIGHTNING_COMPILER -DUSE_PROF_API) + +if(CMAKE_BUILD_TYPE MATCHES "^Debug$") + add_definitions(-DDEBUG) +endif() + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + add_compile_options("-Wno-ignored-attributes") +endif() + +set(USE_PROF_API "1") + +if(NOT DEFINED LIBVDI_STATIC_DIR) + find_path(LIBVDI_STATIC_DIR + NAMES libamdvdi_static.a + PATHS /opt/rocm/vdi + PATH_SUFFIXES lib + ) +endif() + +if(NOT DEFINED VDI_DIR) + find_path(VDI_DIR + NAMES top.hpp + PATH_SUFFIXES include + PATHS /opt/rocm/vdi + ) +endif() +message("Found Static vdi lib:${LIBVDI_STATIC_DIR} and vdi includes: ${VDI_DIR}") +set(PROF_API_HEADER_PATH ${VDI_DIR}/platform) +############################# +# Profiling API support +############################# +# Generate profiling API macros/structures header +set(PROF_API_STR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_prof_str.h") +set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/../include/hip/hcc_detail/hip_runtime_api.h") +set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}") +set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py") +set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt") +set(PROF_API_CMD "${PROF_API_GEN} -v -t --priv ${OPT_PROF_API} ${PROF_API_HDR} ${PROF_API_SRC} ${PROF_API_STR} >${PROF_API_LOG}") +MESSAGE(STATUS "Generating profiling promitives: ${PROF_API_STR}") +execute_process(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}") +#MESSAGE(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}") +set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${PROF_API_GEN} ${PROF_API_HDR} ${PROF_API_STR}) + +# Enable profiling API +if(USE_PROF_API EQUAL 1) + find_path(PROF_API_HEADER_DIR prof_protocol.h + HINTS + ${PROF_API_HEADER_PATH} + PATHS + /opt/rocm/roctracer + PATH_SUFFIXES + include/ext + ) + if(NOT PROF_API_HEADER_DIR) + MESSAGE(WARNING "Profiling API header not found. Disabling roctracer integration. Use -DPROF_API_HEADER_PATH=") + else() + add_definitions(-DUSE_PROF_API=1) + include_directories(${PROF_API_HEADER_DIR}) + MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}") + endif() +endif() + + +if(NOT DEFINED VDI_DIR OR NOT DEFINED LIBOCL_STATIC_DIR OR NOT DEFINED LIBVDI_STATIC_DIR ) + # message(FATAL_ERROR "define VDI_DIR, LIBOCL_STATIC_DIR\n") + +endif() +list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" ) +set(CMAKE_MODULE_PATH${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") + +include_directories(${ROCR_INCLUDES}) +if (DEFINED LLVM_INCLUDES AND NOT ${LLVM_INCLUDES} STREQUAL "") + message(STATUS "LLVM includes found ${LLVM_INCLUDES}") + include_directories(${LLVM_INCLUDES}) +endif() # if (DEFINED LLVM_INCLUDES AND NOT ${LLVM_INCLUDES} STREQUAL "") + +include_directories(${CMAKE_SOURCE_DIR}) +include_directories(${CMAKE_SOURCE_DIR}/include) +include_directories(${CMAKE_SOURCE_DIR}/elfio) +include_directories(${CMAKE_SOURCE_DIR}/amdocl) +include_directories(${CMAKE_SOURCE_DIR}/include/hip/hcc_detail/elfio) +include_directories(${VDI_DIR}) +include_directories(${VDI_DIR}/include) +include_directories(${VDI_DIR}/compiler/lib) +include_directories(${VDI_DIR}/compiler/lib/include) +include_directories(${VDI_DIR}/elf/utils/common) +include_directories(${VDI_DIR}/elf/utils/libelf) +add_definitions(-DUSE_COMGR_LIBRARY -DCOMGR_DYN_DLL) + find_package(amd_comgr REQUIRED CONFIG + PATHS + /opt/rocm/ + PATH_SUFFIXES + cmake/amd_comgr + lib/cmake/amd_comgr + ) + MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.") + +include_directories("$") + +add_definitions(-DBSD_LIBELF) + +add_library(hip64 OBJECT + hip_context.cpp + hip_device.cpp + hip_device_runtime.cpp + hip_error.cpp + hip_event.cpp + hip_memory.cpp + hip_module.cpp + hip_peer.cpp + hip_platform.cpp + hip_profile.cpp + hip_stream.cpp + hip_surface.cpp + hip_texture.cpp + hip_activity.cpp + hip_intercept.cpp + hip_rtc.cpp + cl_gl.cpp + cl_lqdflash_amd.cpp + fixme.cpp + ) +set_target_properties(hip64 PROPERTIES POSITION_INDEPENDENT_CODE ON) +set_target_properties( + hip64 PROPERTIES + CXX_STANDARD 14 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF +) + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) +include(${LIBVDI_STATIC_DIR}/amdvdi_staticTargets.cmake) + +add_library(amdhip64 SHARED + $ + ) + +add_library(amdhip64_static STATIC + $ + ) + +add_library(host INTERFACE) +target_link_libraries(host INTERFACE amdhip64) +add_library(device INTERFACE) +target_link_libraries(device INTERFACE host) + +target_link_libraries(amdhip64_static PRIVATE amdvdi_static pthread dl) +target_link_libraries(amdhip64 PRIVATE amdvdi_static pthread dl) + + +INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) +INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) +INSTALL(CODE "execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink libamdhip64.so lib/libhip_hcc.so )" DESTINATION lib COMPONENT MAIN) + +INSTALL(CODE "execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink libamdhip64.so lib/libhiprtc.so )" DESTINATION lib COMPONENT MAIN) +INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhip_hcc.so DESTINATION lib COMPONENT MAIN) + +INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc.so DESTINATION lib COMPONENT MAIN) + +INSTALL(TARGETS amdhip64_static amdhip64 host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) +INSTALL(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) + diff --git a/vdi/cl_gl.cpp b/vdi/cl_gl.cpp new file mode 100644 index 0000000000..b0403eb488 --- /dev/null +++ b/vdi/cl_gl.cpp @@ -0,0 +1,2432 @@ +/* Copyright (c) 2010-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "top.hpp" + +#ifdef _WIN32 +#include +#include +#include +// This is necessary since there are common GL/D3D10 functions +#include "cl_d3d9_amd.hpp" +#include "cl_d3d10_amd.hpp" +#include "cl_d3d11_amd.hpp" +#endif //_WIN32 + +#include +#include + +#include +#include +#include + +#include "cl_common.hpp" +#include "cl_gl_amd.hpp" + +#include "device/device.hpp" + +/* The pixel internal format for DOPP texture defined in gl_enum.h */ +#define GL_BGR8_ATI 0x8083 +#define GL_BGRA8_ATI 0x8088 + +#include +#include + + +/*! \addtogroup API + * @{ + * + * \addtogroup CL_GL_Interops + * + * This section discusses OpenCL functions that allow applications to + * use OpenGL buffer/texture/render-buffer objects as OpenCL memory + * objects. This allows efficient sharing of data between these OpenCL + * and OpenGL. The OpenCL API can be used to execute kernels that read + * and/or write memory objects that are also an OpenGL buffer object + * or a texture. An OpenCL image object can be created from an OpenGL + * texture or renderbuffer object. An OpenCL buffer object can be + * created from an OpenGL buffer object. An OpenCL memory object can + * be created from an OpenGL texture/buffer/render-buffer object or + * the default system provided framebuffer if any only if the OpenCL + * clContext has been created from a GL clContext. OpenGL contexts are + * created using platform specific APIs (EGL, CGL, WGL, GLX are some + * of the platform specific APIs that allow applications to create GL + * contexts). The appropriate platform API (such as EGL, CGL, WGL, + * GLX) will be extended to allow a CL clContext to be created from a + * GL clContext. Creating an OpenCL memory object from the default + * system provided framebuffer will also require an appropriate + * extension to the platform API. Refer to the appropriate platform + * API documentation to understand how to create a CL clContext from a + * GL clContext and creating a CL memory object from the default + * system provided framebuffer. + * + * @{ + * + * \addtogroup clCreateFromGLBuffer + * + * @{ + */ + +/*! \brief Creates an OpenCL buffer object from an OpenGL buffer object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. Only + * CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE can be used. + * + * \param glBufferName is a GL buffer object name. The GL buffer + * object must have a data store created though it does not need to + * be initialized. The size of the data store will be used to + * determine the size of the CL buffer object. + * + * \param pCpuMem is a pointer to the buffer data that may already be + * allocated by the application. The size of the buffer that pCpuMem points + * to must be >= \a size bytes. Passing in a pointer to an already allocated + * buffer on the host and using it as a buffer object allows applications to + * share data efficiently with kernels and the host. + * + * \param errcode_ret will return an appropriate error code. If errcode_ret + * is NULL, no error code is returned. + * + * \return valid non-zero OpenCL buffer object and errcode_ret is set + * to CL_SUCCESS if the buffer object is created successfully. It + * returns a NULL value with one of the following error values + * returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_GL_OBJECT if glBufferName is not a GL buffer object or is a + * GL buffer object but does not have a data store created. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLBuffer, + (cl_context context, cl_mem_flags flags, GLuint bufobj, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + return (amd::clCreateFromGLBufferAMD(*as_amd(context), flags, bufobj, errcode_ret)); +} +RUNTIME_EXIT + +/*! \brief creates the following: + * - an OpenCL 2D image object from an OpenGL 2D texture object + * or a single face of an OpenGL cubemap texture object, + * - an OpenCL 2D image array object from an OpenGL 2D texture array object, + * - an OpenCL 1D image object from an OpenGL 1D texture object, + * - an OpenCL 1D image buffer object from an OpenGL texture buffer object, + * - an OpenCL 1D image array object from an OpenGL 1D texture array object, + * - an OpenCL 3D image object from an OpenGL 3D texture object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param texture_target must be GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, + * GL_TEXTURE_BUFFER, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D, + * GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP_POSITIVE_X, + * GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_Z or GL_TEXTURE_RECTANGLE_ARB. + * + * \param miplevel is the mipmap level to be used. If \a texture_target + * is GL_TEXTURE_BUFFER, \a miplevel must be 0. + * + * \param texture is a GL 1D, 2D, 3D, 1D array, 2D array, cubemap, + * rectangle or buffer texture object. + * The texture object must be a complete texture as per + * OpenGL rules on texture completeness. The texture format and dimensions + * defined by OpenGL for the specified miplevel of the texture will be + * used to create the OpenCL image memory object. Only GL texture formats + * that map to appropriate image channel order and data type can be used + * to create the the OpenCL image memory object. + * + * \param errcode_ret will return an appropriate error code. If \a + * errcode_ret is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set to + * CL_SUCCESS if the image object is created successfully. It returns a NULL value + * with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture. + * - CL_INVALID_GL_OBJECT if \a texture is not an appropriate GL 2D texture, + * cubemap or texture rectangle. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not + * map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.2r07 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture, + (cl_context context, cl_mem_flags flags, GLenum texture_target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } + + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, texture_target, miplevel, texture, + errcode_ret); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromGLTexture2D + * @{ + */ + +/*! \brief Create an OpenCL 2D image object from an OpenGL 2D texture object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param target must be GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP_POSITIVE_X, + * GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_Z or GL_TEXTURE_RECTANGLE_ARB. + * + * \param miplevel is the mipmap level to be used. + * + * \param texture is a GL 2D texture, cubemap or texture rectangle + * object name. The texture object must be a complete texture as per + * OpenGL rules on texture completeness. The \a texture format and + * dimensions specified using appropriate glTexImage2D call for \a + * miplevel will be used to create the 2D image object. Only GL + * texture formats that map to appropriate image channel order and + * data type can be used to create the 2D image object. + * + * \param errcode_ret will return an appropriate error code. If \a + * errcode_ret is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set to + * CL_SUCCESS if the image object is created successfully. It returns a NULL value + * with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture. + * - CL_INVALID_GL_OBJECT if \a texture is not an appropriate GL 2D texture, + * cubemap or texture rectangle. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not + * map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture2D, + (cl_context context, cl_mem_flags flags, GLenum target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } + + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, miplevel, texture, + errcode_ret); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromGLTexture3D + * @{ + */ + +/*! \brief Create an OpenCL 3D image object from an OpenGL 3D texture object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param target must be GL_TEXTURE_3D. + * + * \param miplevel is the mipmap level to be used. + * + * \param texture is a GL 3D texture object [name]. + * The texture object must be a complete texture as per OpenGL rules on texture + * completeness. The \a texture format and dimensions specified using appropriate + * glTexImage3D call for \a miplevel will be used to create the 3D image object. + * Only GL texture formats that map to appropriate image channel order and + * data type can be used to create the 3D image object. + * + * \param errcode_ret will return an appropriate error code. If \a errcode_ret + * is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set to + * CL_SUCCESS if the image object is created successfully. It returns a NULL value + * with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture. + * - CL_INVALID_GL_OBJECT if \a texture is not an GL 3D texture. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not + * map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture3D, + (cl_context context, cl_mem_flags flags, GLenum target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } + + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, miplevel, texture, + errcode_ret); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromGLRenderbuffer + * @{ + */ + +/*! \brief Create an OpenCL 2D image object from an OpenGL renderbuffer object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param renderbuffer is a GL renderbuffer object name. The renderbuffer + * storage must be specified before the image object can be created. Only + * GL renderbuffer formats that map to appropriate image channel order and + * data type can be used to create the 2D image object. + * + * \param errcode_ret will return an appropriate error code. If \a errcode_ret + * is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set + * to CL_SUCCESS if the image object is created successfully. It returns a + * NULL value with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_GL_OBJECT if \a renderbuffer is not an GL renderbuffer object. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL renderbuffer format + * does not map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLRenderbuffer, (cl_context context, cl_mem_flags flags, + GLuint renderbuffer, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + return (amd::clCreateFromGLRenderbufferAMD(*as_amd(context), flags, renderbuffer, errcode_ret)); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetGLObjectInfo + * @{ + */ + +/*! \brief Query GL object type from a CL memory object. + * + * \param memobj [is a valid cl_mem object created from a GL object]. + * + * \param gl_object_type returns the type of GL object attached to memobj + * and can be CL_GL_OBJECT_BUFFER, CL_GL_OBJECT_TEXTURE2D, + * CL_GL_OBJECT_TEXTURE_RECTANGLE, CL_GL_OBJECT_TEXTURE3D, or + * CL_GL_OBJECT_RENDERBUFFER. If \a gl_object_type is NULL, it is ignored. + * + * \param gl_object_name returns the GL object name used to create memobj. + * If \a gl_object_name is NULL, it is ignored. + * + * \return One of the following values is returned: + * - CL_SUCCESS if the call was executed successfully. + * - CL_INVALID_MEM_OBJECT if \a memobj is not a valid OpenCL memory object. + * - CL_INVALID_GL_OBJECT if there is no GL object associated with \a memobj. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clGetGLObjectInfo, + (cl_mem memobj, cl_gl_object_type* gl_object_type, GLuint* gl_object_name)) { + if (!is_valid(memobj)) { + LogWarning("\"memobj\" is not a valid cl_mem object"); + return CL_INVALID_MEM_OBJECT; + } + + amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); + if (NULL == interop) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + + amd::GLObject* glObject = interop->asGLObject(); + if (NULL == glObject) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + + cl_int result; + + cl_gl_object_type clGLType = glObject->getCLGLObjectType(); + result = amd::clGetInfo(clGLType, sizeof(cl_gl_object_type), gl_object_type, NULL); + + GLuint glName = glObject->getGLName(); + result |= amd::clGetInfo(glName, sizeof(GLuint), gl_object_name, NULL); + + return result; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetGLTextureInfo + * @{ + */ + +/*! \brief Query additional information about the GL texture object associated + * with \a memobj. + * + * \param memobj [is a valid cl_mem object created from a GL object]. + * + * \param param_name specifies what additional information about the GL + * texture object associated with \a memobj to query: + * - CL_GL_TEXTURE_TARGET (GLenum) to query the \a target argument specified + * in clCreateGLTexture2D or clCreateGLTexture3D calls. + * - CL_GL_MIPMAP_LEVEL (GLint) to query the \a miplevel argument specified + * in clCreateGLTexture2D or clCreateGLTexture3D calls. + * + * \param param_value is a pointer to memory where the appropriate result + * being queried is returned. If \a param_value is NULL, it is ignored. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type as + * described for \a param_name argumnet (GLenum or GLint). + * \a param_value_size_ret returns the actual size in bytes of data copied to + * \a param_value. If \a param_value_size_ret is NULL, it is ignored + * + * \return One of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_MEM_OBJECT if \a memobj is not a valid OpenCL memory object. + * - CL_INVALID_GL_OBJECT if there is no GL texture object (2D or 3D texture) + * associated with \a memobj. + * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes + * specified by \a param_value_size is < size of return type required by + * \a param_name and \a param_value is not NULL, or if \a param_value and + * \a param_value_size_ret are NULL. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clGetGLTextureInfo, + (cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(memobj)) { + LogWarning("\"memobj\" is not a valid cl_mem object"); + return CL_INVALID_MEM_OBJECT; + } + amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); + if (NULL == interop) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + amd::GLObject* glObject = interop->asGLObject(); + if ((NULL == glObject) || (NULL != glObject->asBufferGL())) { + LogWarning("CL object \"memobj\" is not created from GL texture"); + return CL_INVALID_GL_OBJECT; + } + + switch (param_name) { + case CL_GL_TEXTURE_TARGET: { + GLenum glTarget = glObject->getGLTarget(); + if (glTarget == GL_TEXTURE_CUBE_MAP) { + glTarget = glObject->getCubemapFace(); + } + return amd::clGetInfo(glTarget, param_value_size, param_value, param_value_size_ret); + } + case CL_GL_MIPMAP_LEVEL: { + GLint mipLevel = glObject->getGLMipLevel(); + return amd::clGetInfo(mipLevel, param_value_size, param_value, param_value_size_ret); + } + case CL_GL_NUM_SAMPLES: { + GLsizei numSamples = glObject->getNumSamples(); + return amd::clGetInfo(numSamples, param_value_size, param_value, param_value_size_ret); + } + default: + LogWarning("Unknown param_name in clGetGLTextureInfoAMD"); + break; + } + + return CL_INVALID_VALUE; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueAcquireExtObjects + * @{ + */ + +/*! \brief Acquire OpenCL memory objects that have been created from external + * objects (OpenGL, D3D). + * + * \param command_queue is a valid command-queue. + * + * \param num_objects is the number of memory objects to be acquired + * in \a mem_objects. + * + * \param mem_objects is a pointer to a list of CL memory objects that refer + * to a GL object (buffer/texture/renderbuffer objects or the framebuffer). + * + * \param event_wait_list specify [is a pointer to] events that need to + * complete before this particular command can be executed. + * If \a event_wait_list is NULL, then this particular command does not wait + * on any event to complete. If \a event_wait_list is NULL, + * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL, + * the list of events pointed to by \a event_wait_list must be valid and + * \a num_events_in_wait_list must be greater than 0. The events specified in + * \a event_wait_list act as synchronization points. + * + * \param num_events_in_wait_list specify the number of events in + * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be + * greater than 0 if \a event_wait_list is not NULL. + * + * \param event returns an event object that identifies this particular + * command and can be used to query or queue a wait for this particular + * command to complete. \a event can be NULL in which case it will not be + * possible for the application to query the status of this command or queue a + * wait for this command to complete. + * + * \return One of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the + * function does nothing. + * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a + * NULL value or if \a num_objects > 0 and \a mem_objects is NULL. + * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid + * OpenCL memory objects. + * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not + * created from an OpenGL clContext. + * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been + * created from a GL object(s). + * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and + * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and + * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list + * are not valid events. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources + * required by the OpenCL implementation on the host. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clEnqueueAcquireGLObjects, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_GL_OBJECTS); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueReleaseGLObjects + * @{ + */ + +/*! \brief Release OpenCL memory objects that have been created from OpenGL + * objects. + * + * \param command_queue is a valid command-queue [which is associated with the + * OpenCL clContext releasing the OpenGL objects]. + * + * \param num_objects is the number of memory objects to be released + * in \a mem_objects. + * + * \param mem_objects is a pointer to a list of CL memory objects that refer + * to a GL object (buffer/texture/renderbuffer objects or the framebuffer). + * + * \param event_wait_list specify [is a pointer to] events that need to + * complete before this particular command can be executed. + * If \a event_wait_list is NULL, then this particular command does not wait + * on any event to complete. If \a event_wait_list is NULL, + * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL, + * the list of events pointed to by \a event_wait_list must be valid and + * \a num_events_in_wait_list must be greater than 0. The events specified in + * \a event_wait_list act as synchronization points. + * + * \param num_events_in_wait_list specify the number of events in + * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be + * greater than 0 if \a event_wait_list is not NULL. + * + * \param event returns an event object that identifies this particular + * command and can be used to query or queue a wait for this particular + * command to complete. \a event can be NULL in which case it will not be + * possible for the application to query the status of this command or queue a + * wait for this command to complete. + * + * \return One of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the + * function does nothing. + * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a + * NULL value or if \a num_objects > 0 and \a mem_objects is NULL. + * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid + * OpenCL memory objects. + * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not + * created from an OpenGL clContext. + * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been + * created from a GL object(s). + * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and + * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and + * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list + * are not valid events. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources + * required by the OpenCL implementation on the host. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clEnqueueReleaseGLObjects, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_GL_OBJECTS); +} +RUNTIME_EXIT + +/*! @} +* \addtogroup clCreateEventFromGLsyncKHR +* @{ +*/ + +/*! \brief Creates an event object linked to an OpenGL sync object. +* Completion of such an event object is equivalent to waiting for completion +* of the fence command associated with the linked GL sync object. +* +* \param context is valid OpenCL context created from an OpenGL context +* or share group, using the cl_khr_gl_sharing extension. +* +* \param sync is the 'name' of a sync object in the GL share group associated +* with context. +* +* \param errcode_ret Returns an appropriate error code as described below. +* If errcode_ret is NULL, no error code is returned. +* +* \return a valid OpenCL event object and errcode_ret is set to CL_SUCCESS +* if the event object is created successfully.Otherwise, it returns a NULL +* value with one of the following error values returned in errcode_ret: +* - CL_INVALID_CONTEXT if context is not a valid context or was not created +* from a GL context. +* - CL_INVALID_GL_OBJECT if sync is not the name of a sync object in the +* GL share group associated with context. +* +* \version 1.1 +*/ + +RUNTIME_ENTRY_RET(cl_event, clCreateEventFromGLsyncKHR, + (cl_context context, cl_GLsync clGLsync, cl_int* errcode_ret)) { + // create event of fence sync type + amd::ClGlEvent* clglEvent = new amd::ClGlEvent(*as_amd(context)); + clglEvent->context().glenv()->glFlush_(); + // initially set the status of fence as queued + clglEvent->setStatus(CL_SUBMITTED); + // store GLsync id of the fence in event in order to associate them together + clglEvent->setData(clGLsync); + amd::Event* evt = dynamic_cast(clglEvent); + evt->retain(); + return as_cl(evt); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetGLContextInfoKHR + * @{ + */ + +/*! \brief This f-n is defined in CL extension cl_khr_gl_sharing and serves + * the purpose of quering current device and all devices that support + * CL-GL interoperability. + * + * \param properties points to an , which is a array of + * ordered pairs terminated with zero. If an + * attribute is not specified in , then its default value + * (listed in table 4.attr) is used (it is said to be specified + * implicitly). If is NULL or empty (points to a list + * whose first value is zero), all attributes take on their default + * values. + * + * \param param_name may accept one of the following enumerated values: + * - CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 + * - CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type as + * described for \a param_name argumnet (GLenum or GLint). + * \a param_value_size_ret returns the actual size in bytes of data copied to + * \a param_value. If \a param_value_size_ret is NULL, it is ignored + * + * \param param_value is a pointer to memory where the appropriate result + * being queried is returned. If \a param_value is NULL, it is ignored. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type as + * described for \a param_name argumnet (GLenum or GLint). + * \a param_value_size_ret returns the actual size in bytes of data copied to + * \a param_value. If \a param_value_size_ret is NULL, it is ignored + * + * \return one of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the + * function does nothing. + * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a + * NULL value or if \a num_objects > 0 and \a mem_objects is NULL. + * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid + * OpenCL memory objects. + * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not + * created from an OpenGL clContext. + * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been + * created from a GL object(s). + * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and + * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and + * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list + * are not valid events. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources + * required by the OpenCL implementation on the host. + * - CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if + * + * \version 1.0r47 + */ +RUNTIME_ENTRY(cl_int, clGetGLContextInfoKHR, + (const cl_context_properties* properties, cl_gl_context_info param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + cl_int errcode=0; + cl_device_id* gpu_devices; + cl_uint num_gpu_devices = 0; + amd::Context::Info info; + static const bool VALIDATE_ONLY = true; + + errcode = amd::Context::checkProperties(properties, &info); + if (CL_SUCCESS != errcode) { + return errcode; + } + + if (!(info.flags_ & amd::Context::GLDeviceKhr)) { + // No GL context is specified + return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; + } + + // Get devices + //errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } + + if (!num_gpu_devices) { + return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; + } + + switch (param_name) { + case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR: + // Return the CL device currently associated with the specified OpenGL context. + if (num_gpu_devices) { + gpu_devices = (cl_device_id*)alloca(num_gpu_devices * sizeof(cl_device_id)); + + //errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { + return errcode; + } + + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + cl_device_id device = gpu_devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, + VALIDATE_ONLY)) { + return amd::clGetInfo(device, param_value_size, param_value, param_value_size_ret); + } + } + + *not_null(param_value_size_ret) = 0; + } + break; + + case CL_DEVICES_FOR_GL_CONTEXT_KHR: { + // List of all CL devices that can be associated with the specified OpenGL context. + cl_uint total_devices = num_gpu_devices; + size_t size = total_devices * sizeof(cl_device_id); + + cl_device_id* devices = (cl_device_id*)alloca(size); + + //errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, total_devices, devices, NULL); + if (errcode != CL_SUCCESS) { + return errcode; + } + + std::vector compatible_devices; + + for (cl_uint i = 0; i < total_devices; ++i) { + cl_device_id device = devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, + VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + + size_t deviceCount = compatible_devices.size(); + size_t deviceCountSize = deviceCount * sizeof(cl_device_id); + + if (param_value != NULL && param_value_size < deviceCountSize) { + return CL_INVALID_VALUE; + } + + *not_null(param_value_size_ret) = deviceCountSize; + + if (param_value != NULL) { + cl_device_id* deviceList = (cl_device_id*)param_value; + for (const auto& it : compatible_devices) { + *deviceList++ = as_cl(it); + } + } + + return CL_SUCCESS; + } break; + + default: + LogWarning("\"param_name\" is not valid"); + return CL_INVALID_VALUE; + } + return CL_SUCCESS; +} +RUNTIME_EXIT + +// +// +// namespace amd +// +// +namespace amd { + +typedef struct { + GLenum glBinding; + GLenum glTarget; +} TargetBindings_t; + +/*! @} + * \addtogroup CL-GL interop helper functions + * @{ + */ + +//! Function clearGLErrors() to clear all GL error bits, if any +void clearGLErrors(const Context& amdContext) { + GLenum glErr, glLastErr = GL_NO_ERROR; + while (1) { + glErr = amdContext.glenv()->glGetError_(); + if (glErr == GL_NO_ERROR || glErr == glLastErr) { + break; + } + glLastErr = glErr; + LogWarning("GL error"); + } +} + +GLenum checkForGLError(const Context& amdContext) { + GLenum glRetErr = GL_NO_ERROR; + GLenum glErr; + while (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + glRetErr = glErr; // Just return the last GL error + LogWarning("Check GL error"); + } + return glRetErr; +} + +//! Function getCLFormatFromGL returns "true" if GL format +//! is compatible with CL format, "false" otherwise. +bool getCLFormatFromGL(const Context& amdContext, GLint gliInternalFormat, + cl_image_format* pclImageFormat, int* piBytesPerPixel, cl_mem_flags flags) { + bool bRetVal = false; + + /* + Available values for "image_channel_order" + ========================================== + CL_R + CL_A + CL_INTENSITY + CL_LUMINANCE + CL_RG + CL_RA + CL_RGB + CL_RGBA + CL_ARGB + CL_BGRA + + Available values for "image_channel_data_type" + ============================================== + CL_SNORM_INT8 + CL_SNORM_INT16 + CL_UNORM_INT8 + CL_UNORM_INT16 + CL_UNORM_SHORT_565 + CL_UNORM_SHORT_555 + CL_UNORM_INT_101010 + CL_SIGNED_INT8 + CL_SIGNED_INT16 + CL_SIGNED_INT32 + CL_UNSIGNED_INT8 + CL_UNSIGNED_INT16 + CL_UNSIGNED_INT32 + CL_HALF_FLOAT + CL_FLOAT + */ + + switch (gliInternalFormat) { + case GL_RGB10_EXT: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_UNORM_INT_101010; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RGB10_A2: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_UNORM_INT_101010; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_BGR8_ATI: + case GL_BGRA8_ATI: + pclImageFormat->image_channel_order = CL_BGRA; + pclImageFormat->image_channel_data_type = CL_UNORM_INT8; // CL_UNSIGNED_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_ALPHA8: + pclImageFormat->image_channel_order = CL_A; + pclImageFormat->image_channel_data_type = CL_UNORM_INT8; // CL_UNSIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; + + case GL_R8: + case GL_R8UI: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_R8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; + + case GL_R8I: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; + + case GL_RG8: + case GL_RG8UI: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RG8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_RG8I: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_RGB8: + case GL_RGB8UI: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGB8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 3; + bRetVal = true; + break; + + case GL_RGB8I: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 3; + bRetVal = true; + break; + + case GL_RGBA: + case GL_RGBA8: + case GL_RGBA8UI: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGBA8UI) ? CL_UNSIGNED_INT8 : CL_UNORM_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RGBA8I: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_R16: + case GL_R16UI: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_R16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + bRetVal = true; + *piBytesPerPixel = 2; + break; + + case GL_R16I: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_R16F: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_RG16: + case GL_RG16UI: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RG16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RG16I: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RG16F: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RGB16: + case GL_RGB16UI: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGB16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 6; + bRetVal = true; + break; + + case GL_RGB16I: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 6; + bRetVal = true; + break; + + case GL_RGB16F: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 6; + bRetVal = true; + break; + + case GL_RGBA16: + case GL_RGBA16UI: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGBA16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RGBA16I: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RGBA16F: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_R32I: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_R32UI: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_R32F: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RG32I: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RG32UI: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RG32F: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RGB32I: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 12; + bRetVal = true; + break; + + case GL_RGB32UI: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 12; + bRetVal = true; + break; + + case GL_RGB32F: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 12; + bRetVal = true; + break; + + case GL_RGBA32I: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 16; + bRetVal = true; + break; + + case GL_RGBA32UI: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 16; + bRetVal = true; + break; + + case GL_RGBA32F: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 16; + bRetVal = true; + break; + case GL_DEPTH_COMPONENT32F: + pclImageFormat->image_channel_order = CL_DEPTH; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; + case GL_DEPTH_COMPONENT16: + pclImageFormat->image_channel_order = CL_DEPTH; + pclImageFormat->image_channel_data_type = CL_UNORM_INT16; + *piBytesPerPixel = 2; + bRetVal = true; + break; + case GL_DEPTH24_STENCIL8: + pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; + pclImageFormat->image_channel_data_type = CL_UNORM_INT24; + *piBytesPerPixel = 4; + bRetVal = true; + break; + case GL_DEPTH32F_STENCIL8: + pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 5; + bRetVal = true; + break; + default: + LogWarning("unsupported GL internal format"); + break; + } + amd::Image::Format imageFormat(*pclImageFormat); + if (bRetVal && !imageFormat.isSupported(amdContext, 0, flags)) { + bRetVal = false; + } + return bRetVal; +} + +void BufferGL::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(BufferGL)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +static GLenum clChannelDataTypeToGlType(cl_channel_type channel_type) { + // Pick + // GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_INT, + // GL_UNSIGNED_INT, GL_FLOAT, GL_2_BYTES, GL_3_BYTES, GL_4_BYTES + // or GL_DOUBLE + switch (channel_type) { + case CL_SNORM_INT8: + return GL_BYTE; + case CL_SNORM_INT16: + return GL_SHORT; + case CL_UNORM_INT8: + return GL_UNSIGNED_BYTE; + case CL_UNORM_INT16: + return GL_UNSIGNED_SHORT; + case CL_SIGNED_INT8: + return GL_BYTE; + case CL_SIGNED_INT16: + return GL_SHORT; + case CL_SIGNED_INT32: + return GL_INT; + case CL_UNSIGNED_INT8: + return GL_UNSIGNED_BYTE; + case CL_UNSIGNED_INT16: + return GL_UNSIGNED_SHORT; + case CL_UNSIGNED_INT32: + return GL_UNSIGNED_INT; + case CL_FLOAT: + return GL_FLOAT; + case CL_UNORM_INT_101010: + return GL_UNSIGNED_INT_10_10_10_2; + case CL_HALF_FLOAT: + case CL_UNORM_SHORT_565: + case CL_UNORM_SHORT_555: + default: + guarantee(false && "Unexpected CL type."); + return 0; + } +} + +static GLenum glInternalFormatToGlFormat(GLenum internalFormat) { + switch (internalFormat) { + // Base internal formats + case GL_RGBA: + case GL_BGRA: + return internalFormat; + // Sized internal formats + case GL_RGBA8: + case GL_RGBA16: + case GL_RGBA16F: + case GL_RGBA32F: + return GL_RGBA; + case GL_RGBA8I: + case GL_RGBA8UI: + case GL_RGBA16I: + case GL_RGBA16UI: + case GL_RGBA32I: + case GL_RGBA32UI: + return GL_RGBA_INTEGER; + + default: + guarantee(false && "Unexpected GL internal format."); + return 0; + } +} + +void ImageGL::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(ImageGL)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +//******************************************************************* +// +// Internal implementation of CL API functions +// +//******************************************************************* + +// +// clCreateFromGLBufferAMD +// +cl_mem clCreateFromGLBufferAMD(Context& amdContext, cl_mem_flags flags, GLuint bufobj, + cl_int* errcode_ret) { + BufferGL* pBufferGL = NULL; + GLenum glErr; + GLenum glTarget = GL_ARRAY_BUFFER; + GLint gliSize = 0; + GLint gliMapped = 0; + + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return (cl_mem)0; + } + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); + } + + // Verify GL buffer object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsBuffer_(bufobj)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"bufobj\" is not a GL buffer object"); + return (cl_mem)0; + } + + // It seems that CL spec is not concerned with GL_BUFFER_USAGE, so skip it + + // Check if size is available - data store is created + + amdContext.glenv()->glBindBuffer_(glTarget, bufobj); + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &gliSize); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("cannot get the GL buffer size"); + return (cl_mem)0; + } + if (gliSize == 0) { + //@todo - check why sometime the size is zero + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("the GL buffer's data store is not created"); + return (cl_mem)0; + } + + // Mapping will be done at acquire time (sync point) + + } // Release scoped lock + + // Now create BufferGL object + pBufferGL = new (amdContext) BufferGL(amdContext, flags, gliSize, 0, bufobj); + + if (!pBufferGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("cannot create object of class BufferGL"); + return (cl_mem)0; + } + + if (!pBufferGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pBufferGL->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + + // Create interop object + if (pBufferGL->getInteropObj() == NULL) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("cannot create object of class BufferGL"); + return (cl_mem)0; + } + + // Fixme: If more than one device is present in the context, we choose the first device. + // We should come up with a more elegant solution to handle this. + assert(amdContext.devices().size() == 1); + + const auto it = amdContext.devices().cbegin(); + const amd::Device& dev = *(*it); + + device::Memory* mem = pBufferGL->getDeviceMemory(dev); + if (NULL == mem) { + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", pBufferGL->getSize()); + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + return (cl_mem)0; + } + mem->processGLResource(device::Memory::GLDecompressResource); + + return as_cl(pBufferGL); +} + +cl_mem clCreateFromGLTextureAMD(Context& amdContext, cl_mem_flags clFlags, GLenum target, + GLint miplevel, GLuint texture, int* errcode_ret) { + ImageGL* pImageGL = NULL; + GLenum glErr; + GLenum glTarget = 0; + GLenum glInternalFormat; + cl_image_format clImageFormat; + uint dim = 1; + cl_mem_object_type clType; + cl_gl_object_type clGLType; + GLsizei numSamples = 1; + + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return static_cast(0); + } + + GLint gliTexWidth = 1; + GLint gliTexHeight = 1; + GLint gliTexDepth = 1; + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); + } + + // Verify GL texture object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsTexture_(texture)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"texture\" is not a GL texture object"); + return static_cast(0); + } + + bool image = true; + + // Check target value validity + switch (target) { + case GL_TEXTURE_BUFFER: + glTarget = GL_TEXTURE_BUFFER; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D_BUFFER; + clGLType = CL_GL_OBJECT_TEXTURE_BUFFER; + image = false; + break; + + case GL_TEXTURE_1D: + glTarget = GL_TEXTURE_1D; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D; + clGLType = CL_GL_OBJECT_TEXTURE1D; + break; + + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + glTarget = GL_TEXTURE_CUBE_MAP; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_1D_ARRAY: + glTarget = GL_TEXTURE_1D_ARRAY; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE1D_ARRAY; + break; + + case GL_TEXTURE_2D: + glTarget = GL_TEXTURE_2D; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_MULTISAMPLE: + glTarget = GL_TEXTURE_2D_MULTISAMPLE; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_RECTANGLE_ARB: + glTarget = GL_TEXTURE_RECTANGLE_ARB; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_ARRAY: + glTarget = GL_TEXTURE_2D_ARRAY; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE2D_ARRAY; + break; + + case GL_TEXTURE_3D: + glTarget = GL_TEXTURE_3D; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE3D; + clGLType = CL_GL_OBJECT_TEXTURE3D; + break; + + default: + // wrong value + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid \"target\" value"); + return static_cast(0); + break; + } + + amdContext.glenv()->glBindTexture_(glTarget, texture); + + // Check if size is available - data store is created + if (image) { + // Check mipmap level for "texture" name + GLint gliTexBaseLevel; + GLint gliTexMaxLevel; + + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_BASE_LEVEL, &gliTexBaseLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("Cannot get base mipmap level of a GL \"texture\" object"); + return static_cast(0); + } + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_MAX_LEVEL, &gliTexMaxLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("Cannot get max mipmap level of a GL \"texture\" object"); + return static_cast(0); + } + if ((gliTexBaseLevel > miplevel) || (miplevel > gliTexMaxLevel)) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("\"miplevel\" is not a valid mipmap level of the GL \"texture\" object"); + return static_cast(0); + } + + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_INTERNAL_FORMAT, + (GLint*)&glInternalFormat); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_SAMPLES, + (GLint*)&numSamples); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get numbers of samples of GL \"texture\" object"); + return static_cast(0); + } + if (numSamples > 1) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("MSAA \"texture\" object is not suppoerted for the device"); + return static_cast(0); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + return static_cast(0); + } + + switch (dim) { + case 3: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_DEPTH, + &gliTexDepth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the depth of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + // Fall trough to process other dimensions... + case 2: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_HEIGHT, + &gliTexHeight); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the height of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + // Fall trough to process other dimensions... + case 1: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_WIDTH, + &gliTexWidth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the width of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + break; + default: + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid \"target\" value"); + return static_cast(0); + } + } else { + GLint size; + + // In case target is GL_TEXTURE_BUFFER + GLint backingBuffer; + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_( + glTarget, 0, GL_TEXTURE_BUFFER_DATA_STORE_BINDING, &backingBuffer); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get backing buffer for GL \"texture buffer\" object"); + return static_cast(0); + } + amdContext.glenv()->glBindBuffer_(glTarget, backingBuffer); + + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetIntegerv_(GL_TEXTURE_BUFFER_FORMAT_EXT, + reinterpret_cast(&glInternalFormat)); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + return static_cast(0); + } + + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &size); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + gliTexWidth = size / iBytesPerPixel; + } + size_t imageSize = (clType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? static_cast(gliTexHeight) + : static_cast(gliTexDepth); + + if (!amd::Image::validateDimensions( + amdContext.devices(), clType, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), imageSize)) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"texture\" data store is not created or out of supported dimensions"); + return static_cast(0); + } + + // PBO and mapping will be done at "acquire" time (sync point) + + } // Release scoped lock + + target = (glTarget == GL_TEXTURE_CUBE_MAP) ? target : 0; + + pImageGL = new (amdContext) + ImageGL(amdContext, clType, clFlags, clImageFormat, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), glTarget, + texture, miplevel, glInternalFormat, clGLType, numSamples, target); + + if (!pImageGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("Cannot create class ImageGL - out of memory?"); + return static_cast(0); + } + + if (!pImageGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImageGL->release(); + return static_cast(0); + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImageGL); +} + +// +// clCreateFromGLRenderbufferDAMD +// +cl_mem clCreateFromGLRenderbufferAMD(Context& amdContext, cl_mem_flags clFlags, GLuint renderbuffer, + int* errcode_ret) { + ImageGL* pImageGL = NULL; + GLenum glErr; + + GLenum glTarget = GL_RENDERBUFFER; + GLenum glInternalFormat; + cl_image_format clImageFormat; + + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return (cl_mem)0; + } + + GLint gliRbWidth; + GLint gliRbHeight; + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); + } + + // Verify GL renderbuffer object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsRenderbufferEXT_(renderbuffer)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"renderbuffer\" is not a GL texture object"); + return (cl_mem)0; + } + + amdContext.glenv()->glBindRenderbuffer_(glTarget, renderbuffer); + + // Get GL RB format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_INTERNAL_FORMAT, + (GLint*)&glInternalFormat); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of GL \"renderbuffer\" object"); + return (cl_mem)0; + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"renderbuffer\" format does not map to an appropriate CL image format"); + return (cl_mem)0; + } + + // Check if size is available - data store is created + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_WIDTH, + &gliRbWidth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the width of GL \"renderbuffer\""); + return (cl_mem)0; + } + if (gliRbWidth == 0) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"renderbuffer\" data store is not created"); + return (cl_mem)0; + } + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_HEIGHT, + &gliRbHeight); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the height of GL \"renderbuffer\""); + return (cl_mem)0; + } + if (gliRbHeight == 0) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"renderbuffer\" data store is not created"); + return (cl_mem)0; + } + + // PBO and mapping will be done at "acquire" time (sync point) + + } // Release scoped lock + + pImageGL = + new (amdContext) ImageGL(amdContext, CL_MEM_OBJECT_IMAGE2D, clFlags, clImageFormat, + (size_t)gliRbWidth, (size_t)gliRbHeight, 1, glTarget, renderbuffer, + 0, glInternalFormat, CL_GL_OBJECT_RENDERBUFFER, 0); + + if (!pImageGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("Cannot create class ImageGL from renderbuffer - out of memory?"); + return (cl_mem)0; + } + + if (!pImageGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImageGL->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImageGL); +} + +// +// clEnqueueAcquireExtObjectsAMD +// + +static cl_int clSetInteropObjects(cl_uint num_objects, const cl_mem* mem_objects, + std::vector& interopObjects) { + if ((num_objects == 0 && mem_objects != NULL) || (num_objects != 0 && mem_objects == NULL)) { + return CL_INVALID_VALUE; + } + + while (num_objects-- > 0) { + cl_mem obj = *mem_objects++; + if (!is_valid(obj)) { + return CL_INVALID_MEM_OBJECT; + } + + amd::Memory* mem = as_amd(obj); + if (mem->getInteropObj() == NULL) { + return CL_INVALID_GL_OBJECT; + } + + interopObjects.push_back(mem); + } + return CL_SUCCESS; +} + +cl_int clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_command_type cmd_type) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (cmd_type == CL_COMMAND_ACQUIRE_GL_OBJECTS) { + // Verify context init'ed for interop + if (!hostQueue.context().glenv() || !hostQueue.context().glenv()->isAssociated()) { + LogWarning("\"amdContext\" is not created from GL context or share list"); + return CL_INVALID_CONTEXT; + } + } + + std::vector memObjects; + cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); + if (err != CL_SUCCESS) { + return err; + } + + amd::Command::EventWaitList eventWaitList; + err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + +#ifdef _WIN32 + if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) { + //! Make sure D3D10 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR) { + SyncD3D10Objects(memObjects); + } + //! Make sure D3D11 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR) { + SyncD3D11Objects(memObjects); + } + //! Make sure D3D9 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR) { + SyncD3D9Objects(memObjects); + } + } +#endif //_WIN32 + + //! Now create command and enqueue + amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand( + hostQueue, eventWaitList, num_objects, memObjects, cmd_type); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; +} + + +// +// clEnqueueReleaseExtObjectsAMD +// +cl_int clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_command_type cmd_type) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + std::vector memObjects; + cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); + if (err != CL_SUCCESS) { + return err; + } + + amd::Command::EventWaitList eventWaitList; + err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + + //! Now create command and enqueue + amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand( + hostQueue, eventWaitList, num_objects, memObjects, cmd_type); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + +#ifdef _WIN32 + if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) { + //! Make sure CL command queue is flushed and all commands are finished + //! before D3D10 side would access interop resources + if (cmd_type == CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR || + cmd_type == CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR || + cmd_type == CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR) { + command->awaitCompletion(); + } + } +#endif //_WIN32 + + *not_null(event) = as_cl(&command->event()); + + if (event == NULL) { + command->release(); + } + + return CL_SUCCESS; +} + +// Placed here as opposed to command.cpp, as glext.h and cl_gl_amd.hpp will have +// to be included because of the GL calls +bool ClGlEvent::waitForFence() { + GLenum ret; + // get fence id associated with fence event + GLsync gs = reinterpret_cast(command().data()); + if (!gs) return false; + +// Try to use DC and GLRC of current thread, if it doesn't exist +// create a new GL context on this thread, which is shared with the original context + +#ifdef _WIN32 + HDC tempDC_ = wglGetCurrentDC(); + HGLRC tempGLRC_ = wglGetCurrentContext(); + // Set DC and GLRC + if (tempDC_ && tempGLRC_) { + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + } else { + tempDC_ = context().glenv()->getDC(); + tempGLRC_ = context().glenv()->getIntGLRC(); + if (!context().glenv()->init(reinterpret_cast(tempDC_), + reinterpret_cast(tempGLRC_))) + return false; + + // Make the newly created GL context current to this thread + context().glenv()->setIntEnv(); + // If fence has not yet executed, wait till it finishes + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + // Since we're done making GL calls, restore whatever context was previously current to this + // thread + context().glenv()->restoreEnv(); + } +#else // Lnx + Display* tempDpy_ = context().glenv()->glXGetCurrentDisplay_(); + GLXDrawable tempDrawable_ = context().glenv()->glXGetCurrentDrawable_(); + GLXContext tempCtx_ = context().glenv()->glXGetCurrentContext_(); + // Set internal Display and GLXContext + if (tempDpy_ && tempCtx_) { + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + } else { + if (!context().glenv()->init(reinterpret_cast(context().glenv()->getIntDpy()), + reinterpret_cast(context().glenv()->getIntCtx()))) + return false; + + // Make the newly created GL context current to this thread + context().glenv()->setIntEnv(); + // If fence has not yet executed, wait till it finishes + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + // Since we're done making GL calls, restore whatever context was previously current to this + // thread + context().glenv()->restoreEnv(); + } +#endif + // If we reach this point, fence should have completed + setStatus(CL_COMPLETE); + return true; +} + +// +// GLFunctions implementation +// + +#ifdef _WIN32 +#define CONVERT_CHAR_GLUBYTE +#else //!_WIN32 +#define CONVERT_CHAR_GLUBYTE (GLubyte*) +#endif //!_WIN32 + +#define GLPREFIX(rtype, fcn, dclargs) \ + if (!(fcn##_ = (PFN_##fcn)GETPROCADDRESS(libHandle_, #fcn))) { \ + if (!(fcn##_ = (PFN_##fcn)GetProcAddress_(reinterpret_cast(#fcn)))) ++missed_; \ + } + +GLFunctions::SetIntEnv::SetIntEnv(GLFunctions* env) : env_(env) { + env_->getLock().lock(); + + // Set environment (DC and GLRC) + isValid_ = env_->setIntEnv(); +} + +GLFunctions::SetIntEnv::~SetIntEnv() { + // Restore environment (CL DC and CL GLRC) + env_->restoreEnv(); + + env_->getLock().unlock(); +} + +GLFunctions::GLFunctions(HMODULE h, bool isEGL) + : libHandle_(h), + missed_(0), + eglDisplay_(EGL_NO_DISPLAY), + eglOriginalContext_(EGL_NO_CONTEXT), + eglInternalContext_(EGL_NO_CONTEXT), + eglTempContext_(EGL_NO_CONTEXT), + isEGL_(isEGL), +#ifdef _WIN32 + hOrigGLRC_(0), + hDC_(0), + hIntGLRC_(0) +#else //!_WIN32 + Dpy_(0), + Drawable_(0), + origCtx_(0), + intDpy_(0), + intDrawable_(0), + intCtx_(0), + XOpenDisplay_(NULL), + XCloseDisplay_(NULL), + glXGetCurrentDrawable_(NULL), + glXGetCurrentDisplay_(NULL), + glXGetCurrentContext_(NULL), + glXChooseVisual_(NULL), + glXCreateContext_(NULL), + glXDestroyContext_(NULL), + glXMakeCurrent_(NULL) +#endif //!_WIN32 +{ +#define VERIFY_POINTER(p) \ + if (NULL == p) { \ + missed_++; \ + } + + if (isEGL_) { + GetProcAddress_ = (PFN_xxxGetProcAddress)GETPROCADDRESS(h, "eglGetProcAddress"); + } else { + GetProcAddress_ = (PFN_xxxGetProcAddress)GETPROCADDRESS(h, API_GETPROCADDR); + } +#ifndef _WIN32 + // Initialize pointers to X11/GLX functions + // We can not link with these functions on compile time since we need to support + // console mode. In console mode X server and X server components may be absent. + // Hence linking with X11 or libGL will fail module image loading in console mode.-tzachi cohen + + if (!isEGL_) { + glXGetCurrentDrawable_ = (PFNglXGetCurrentDrawable)GETPROCADDRESS(h, "glXGetCurrentDrawable"); + VERIFY_POINTER(glXGetCurrentDrawable_) + glXGetCurrentDisplay_ = (PFNglXGetCurrentDisplay)GETPROCADDRESS(h, "glXGetCurrentDisplay"); + VERIFY_POINTER(glXGetCurrentDisplay_) + glXGetCurrentContext_ = (PFNglXGetCurrentContext)GETPROCADDRESS(h, "glXGetCurrentContext"); + VERIFY_POINTER(glXGetCurrentContext_) + glXChooseVisual_ = (PFNglXChooseVisual)GETPROCADDRESS(h, "glXChooseVisual"); + VERIFY_POINTER(glXChooseVisual_) + glXCreateContext_ = (PFNglXCreateContext)GETPROCADDRESS(h, "glXCreateContext"); + VERIFY_POINTER(glXCreateContext_) + glXDestroyContext_ = (PFNglXDestroyContext)GETPROCADDRESS(h, "glXDestroyContext"); + VERIFY_POINTER(glXDestroyContext_) + glXMakeCurrent_ = (PFNglXMakeCurrent)GETPROCADDRESS(h, "glXMakeCurrent"); + VERIFY_POINTER(glXMakeCurrent_) + + HMODULE hXModule = (HMODULE)Os::loadLibrary("libX11.so.6"); + if (NULL != hXModule) { + XOpenDisplay_ = (PFNXOpenDisplay)GETPROCADDRESS(hXModule, "XOpenDisplay"); + VERIFY_POINTER(XOpenDisplay_) + XCloseDisplay_ = (PFNXCloseDisplay)GETPROCADDRESS(hXModule, "XCloseDisplay"); + VERIFY_POINTER(XCloseDisplay_) + } else { + missed_ += 2; + } + } +// Initialize pointers to GL functions +#include "gl_functions.hpp" +#else + if (!isEGL_) { + wglCreateContext_ = (PFN_wglCreateContext)GETPROCADDRESS(h, "wglCreateContext"); + VERIFY_POINTER(wglCreateContext_) + wglGetCurrentContext_ = (PFN_wglGetCurrentContext)GETPROCADDRESS(h, "wglGetCurrentContext"); + VERIFY_POINTER(wglGetCurrentContext_) + wglGetCurrentDC_ = (PFN_wglGetCurrentDC)GETPROCADDRESS(h, "wglGetCurrentDC"); + VERIFY_POINTER(wglGetCurrentDC_) + wglDeleteContext_ = (PFN_wglDeleteContext)GETPROCADDRESS(h, "wglDeleteContext"); + VERIFY_POINTER(wglDeleteContext_) + wglMakeCurrent_ = (PFN_wglMakeCurrent)GETPROCADDRESS(h, "wglMakeCurrent"); + VERIFY_POINTER(wglMakeCurrent_) + wglShareLists_ = (PFN_wglShareLists)GETPROCADDRESS(h, "wglShareLists"); + VERIFY_POINTER(wglShareLists_) + } +#endif +} + +GLFunctions::~GLFunctions() { +#ifdef _WIN32 + if (hIntGLRC_) { + if (!wglDeleteContext_(hIntGLRC_)) { + DWORD dwErr = GetLastError(); + LogWarning("Cannot delete GLRC"); + } + } +#else //!_WIN32 + if (intDpy_) { + if (intCtx_) { + glXDestroyContext_(intDpy_, intCtx_); + intCtx_ = NULL; + } + XCloseDisplay_(intDpy_); + intDpy_ = NULL; + } +#endif //!_WIN32 +} + +bool GLFunctions::init(intptr_t hdc, intptr_t hglrc) { + if (isEGL_) { + eglDisplay_ = (EGLDisplay)hdc; + eglOriginalContext_ = (EGLContext)hglrc; + return true; + } + +#ifdef _WIN32 + DWORD err; + + if (missed_) { + return false; + } + + if (!hdc) { + hDC_ = wglGetCurrentDC_(); + } else { + hDC_ = (HDC)hdc; + } + hOrigGLRC_ = (HGLRC)hglrc; + if (!(hIntGLRC_ = wglCreateContext_(hDC_))) { + err = GetLastError(); + return false; + } + if (!wglShareLists_(hOrigGLRC_, hIntGLRC_)) { + err = GetLastError(); + return false; + } + + bool makeCurrentNull = false; + + if (wglGetCurrentContext_() == NULL) { + wglMakeCurrent_(hDC_, hIntGLRC_); + + makeCurrentNull = true; + } + +// Initialize pointers to GL functions +#include "gl_functions.hpp" + + if (makeCurrentNull) { + wglMakeCurrent_(NULL, NULL); + } + + if (missed_ == 0) { + return true; + } +#else //!_WIN32 + if (!missed_) { + if (!hdc) { + Dpy_ = glXGetCurrentDisplay_(); + } else { + Dpy_ = (Display*)hdc; + } + Drawable_ = glXGetCurrentDrawable_(); + origCtx_ = (GLXContext)hglrc; + + int attribList[] = {GLX_RGBA, None}; + if (!(intDpy_ = XOpenDisplay_(DisplayString(Dpy_)))) { +#if defined(ATI_ARCH_X86) + asm("int $3"); +#endif + } + intDrawable_ = DefaultRootWindow(intDpy_); + + XVisualInfo* vis; + int defaultScreen = DefaultScreen(intDpy_); + if (!(vis = glXChooseVisual_(intDpy_, defaultScreen, attribList))) { + return false; + } + if (!(intCtx_ = glXCreateContext_(intDpy_, vis, origCtx_, true))) { + return false; + } + return true; + } +#endif //!_WIN32 + return false; +} + +bool GLFunctions::setIntEnv() { + if (isEGL_) { + return true; + } +#ifdef _WIN32 + // Save current DC and GLRC + tempDC_ = wglGetCurrentDC_(); + tempGLRC_ = wglGetCurrentContext_(); + // Set internal DC and GLRC + if (tempDC_ != getDC() || tempGLRC_ != getIntGLRC()) { + if (!wglMakeCurrent_(getDC(), getIntGLRC())) { + DWORD err = GetLastError(); + LogWarning("cannot set internal GL environment"); + return false; + } + } +#else //!_WIN32 + tempDpy_ = glXGetCurrentDisplay_(); + tempDrawable_ = glXGetCurrentDrawable_(); + tempCtx_ = glXGetCurrentContext_(); + // Set internal Display and GLXContext + if (tempDpy_ != getDpy() || tempCtx_ != getIntCtx()) { + if (!glXMakeCurrent_(getIntDpy(), getIntDrawable(), getIntCtx())) { + LogWarning("cannot set internal GL environment"); + return false; + } + } +#endif //!_WIN32 + + return true; +} + +bool GLFunctions::restoreEnv() { + if (isEGL_) { + // eglMakeCurrent( ); + return true; + } +#ifdef _WIN32 + // Restore original DC and GLRC + if (!wglMakeCurrent_(tempDC_, tempGLRC_)) { + DWORD err = GetLastError(); + LogWarning("cannot restore original GL environment"); + return false; + } +#else //!_WIN32 + // Restore Display and GLXContext + if (tempDpy_) { + if (!glXMakeCurrent_(tempDpy_, tempDrawable_, tempCtx_)) { + LogWarning("cannot restore original GL environment"); + return false; + } + } else { + // Just release internal context + if (!glXMakeCurrent_(getIntDpy(), None, NULL)) { + LogWarning("cannot reelase internal GL environment"); + return false; + } + } +#endif //!_WIN32 + + return true; +} + +} // namespace amd diff --git a/vdi/cl_gl_amd.hpp b/vdi/cl_gl_amd.hpp new file mode 100644 index 0000000000..36831fa747 --- /dev/null +++ b/vdi/cl_gl_amd.hpp @@ -0,0 +1,379 @@ +/* Copyright (c) 2010-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef CL_GL_AMD_HPP_ +#define CL_GL_AMD_HPP_ + +#ifdef _WIN32 +#include +#else //!_WIN32 +#include +#endif //!_WIN32 + +#include +#include +#include "CL/cl_gl.h" +#ifndef _WIN32 +#include +#endif //!_WIN32 + +#include +#include +#include + +#include "platform/context.hpp" +#include "platform/command.hpp" + +namespace amd +{ + +//! Class GLObject keeps all the info about the GL object +//! from which the CL object is created +class GLObject : public InteropObject +{ +protected: + cl_gl_object_type clGLType_; //!< CL GL object type + GLenum glTarget_; + GLuint gluiName_; + GLint gliMipLevel_; + GLenum glInternalFormat_; + GLint gliWidth_; + GLint gliHeight_; + GLint gliDepth_; + GLenum glCubemapFace_; + GLsizei glNumSamples_; + +public: +//! GLObject constructor initializes member variables + GLObject( + GLenum glTarget, + GLuint gluiName, + GLint gliMipLevel, + GLenum glInternalFormat, + GLint gliWidth, + GLint gliHeight, + GLint gliDepth, + cl_gl_object_type clGLType, + GLenum glCubemapFace, + GLsizei glNumSamples + ): // Initialization of member variables + clGLType_(clGLType), + glTarget_(glTarget), + gluiName_(gluiName), + gliMipLevel_(gliMipLevel), + glInternalFormat_(glInternalFormat), + gliWidth_(gliWidth), + gliHeight_(gliHeight), + gliDepth_(gliDepth), + glCubemapFace_(glCubemapFace), + glNumSamples_(glNumSamples) + { + } + + virtual ~GLObject() {} + virtual GLObject* asGLObject() {return this;} + +//! GLObject query functions to get GL info from member variables + GLenum getGLTarget() const {return glTarget_;} + GLuint getGLName() const {return gluiName_;} + GLint getGLMipLevel() const {return gliMipLevel_;} + GLenum getGLInternalFormat() const {return glInternalFormat_;} + GLint getGLSize() const {return gliWidth_;} + GLint getGLWidth() const {return gliWidth_;} + GLint getGLHeight() const {return gliHeight_;} + GLint getGLDepth() const {return gliDepth_;} + cl_gl_object_type getCLGLObjectType() const { return clGLType_; } + GLenum getCubemapFace() const {return glCubemapFace_;} + GLsizei getNumSamples() const { return glNumSamples_;} +}; + + +//! Class BufferGL is drived from classes Buffer and GLObject +//! where the former keeps all data for CL object and +//! the latter keeps all data for GL object +class BufferGL : public Buffer, public GLObject +{ +protected: + //! Initializes the device memory array which is nested + // after'BufferGL' object in memory layout. + virtual void initDeviceMemory(); +public: +//! BufferGL constructor just calls constructors of base classes +//! to pass down the parameters + BufferGL( + Context& amdContext, + cl_mem_flags clFlags, + size_t uiSizeInBytes, + GLenum glTarget, + GLuint gluiName) + : // Call base classes constructors + Buffer( + amdContext, + clFlags, + uiSizeInBytes + ), + GLObject( + glTarget, + gluiName, + 0, // Mipmap level default + GL_ARRAY_BUFFER, // Just init to some value + (GLint) uiSizeInBytes, + 1, + 1, + CL_GL_OBJECT_BUFFER, + 0, + 0 + ) + { + setInteropObj(this); + } + virtual ~BufferGL() {} + + virtual BufferGL* asBufferGL() { return this; } +}; + + +//! Class ImageGL is derived from classes Image and GLObject +//! where the former keeps all data for CL object and +//! the latter keeps all data for GL object +class ImageGL : public Image, public GLObject +{ +public: + //! ImageGL constructor just calls constructors of base classes + //! to pass down the parameters + ImageGL( + Context& amdContext, + cl_mem_object_type clType, + cl_mem_flags clFlags, + const Format& format, + size_t width, + size_t height, + size_t depth, + GLenum glTarget, + GLuint gluiName, + GLint gliMipLevel, + GLenum glInternalFormat, + cl_gl_object_type clGLType, + GLsizei numSamples, + GLenum glCubemapFace = 0) + : Image(amdContext, clType, clFlags, format, width, height, depth, + Format(format).getElementSize() * width, + Format(format).getElementSize() * width * depth) + , GLObject(glTarget, gluiName, gliMipLevel, glInternalFormat, + static_cast(width), static_cast(height), + static_cast(depth), clGLType, glCubemapFace,numSamples) + { + setInteropObj(this); + } + + virtual ~ImageGL() {} + +protected: + //! Initializes the device memory array which is nested + // after'BufferGL' object in memory layout. + virtual void initDeviceMemory(); +}; + +#ifdef _WIN32 +#define APICALL WINAPI +#define GETPROCADDRESS GetProcAddress +#define API_GETPROCADDR "wglGetProcAddress" +#define FCN_STR_TYPE LPCSTR + typedef PROC (WINAPI* PFN_xxxGetProcAddress) (LPCSTR fcnName); + typedef HGLRC (APICALL* PFN_wglCreateContext) (HDC hdc); + typedef HGLRC (APICALL* PFN_wglGetCurrentContext) (void); + typedef HDC (APICALL* PFN_wglGetCurrentDC) (void); + typedef BOOL (APICALL* PFN_wglDeleteContext) (HGLRC hglrc); + typedef BOOL (APICALL* PFN_wglMakeCurrent) (HDC hdc, HGLRC hglrc); + typedef BOOL (APICALL* PFN_wglShareLists) (HGLRC hglrc1, HGLRC hglrc2); +#else //!_WIN32 +#define APICALL // __stdcall //??? todo odintsov +#define API_GETPROCADDR "glXGetProcAddress" +#define GETPROCADDRESS dlsym +#define FCN_STR_TYPE const GLubyte* +#define WINAPI +#define PROC void* + typedef void* (*PFN_xxxGetProcAddress) (const GLubyte* procName); + // X11 typedef + typedef Display* (*PFNXOpenDisplay)(_Xconst char* display_name ); + typedef int (*PFNXCloseDisplay)(Display* display ); + + //glx typedefs + typedef GLXDrawable (*PFNglXGetCurrentDrawable)(); + typedef Display* (*PFNglXGetCurrentDisplay)(); + typedef GLXContext (*PFNglXGetCurrentContext)( void ); + typedef XVisualInfo* (*PFNglXChooseVisual)(Display *dpy, int screen, int *attribList); + typedef GLXContext(*PFNglXCreateContext)(Display* dpy,XVisualInfo* vis,GLXContext shareList,Bool direct); + typedef void(*PFNglXDestroyContext)(Display* dpy, GLXContext ctx); + typedef Bool(*PFNglXMakeCurrent)( Display* dpy, GLXDrawable drawable, GLXContext ctx); + typedef void* HMODULE; +#endif //!_WIN32 + +#define GLPREFIX(rtype, fcn, dclargs) \ + typedef rtype (APICALL* PFN_##fcn) dclargs; + +// Declare prototypes for GL functions +#include "gl_functions.hpp" + +class GLFunctions +{ +public: + //! Locks any access to the virtual GPUs + class SetIntEnv : public amd::StackObject { + public: + //! Default constructor + SetIntEnv(GLFunctions* env); + + //! Destructor + ~SetIntEnv(); + + //! Checks if the environment setup was successful + bool isValid() const { return isValid_; } + + private: + GLFunctions* env_; //!< GL environment + bool isValid_; //!< If TRUE, then it's a valid setup + }; + +private: + HMODULE libHandle_; + int missed_; // Indicates how many GL functions not init'ed, if any + + amd::Monitor lock_; + + EGLDisplay eglDisplay_; + EGLContext eglOriginalContext_; + EGLContext eglInternalContext_; + EGLContext eglTempContext_; + bool isEGL_; + +#ifdef _WIN32 + HGLRC hOrigGLRC_; + HDC hDC_; + HGLRC hIntGLRC_; // handle for internal GLRC to access shared context + HDC tempDC_; + HGLRC tempGLRC_; + + PFN_wglCreateContext wglCreateContext_; + PFN_wglGetCurrentContext wglGetCurrentContext_; + PFN_wglGetCurrentDC wglGetCurrentDC_; + PFN_wglDeleteContext wglDeleteContext_; + PFN_wglMakeCurrent wglMakeCurrent_; + PFN_wglShareLists wglShareLists_; +#else +public: + Display* Dpy_; + GLXDrawable Drawable_; + GLXContext origCtx_; + Display* intDpy_; + Window intDrawable_; + GLXContext intCtx_; + Display* tempDpy_; + GLXDrawable tempDrawable_; + GLXContext tempCtx_; + + //pointers to X11 functions + PFNXOpenDisplay XOpenDisplay_; + PFNXCloseDisplay XCloseDisplay_; + + //pointers to GLX functions + PFNglXGetCurrentDrawable glXGetCurrentDrawable_; + PFNglXGetCurrentDisplay glXGetCurrentDisplay_; + PFNglXGetCurrentContext glXGetCurrentContext_; + PFNglXChooseVisual glXChooseVisual_; + PFNglXCreateContext glXCreateContext_; + PFNglXDestroyContext glXDestroyContext_; + PFNglXMakeCurrent glXMakeCurrent_; +#endif +public: + + GLFunctions(HMODULE h, bool isEGL); + ~GLFunctions(); + + // Query CL-GL context association + bool isAssociated() const + { + if (isEGL_ && eglDisplay_ && eglOriginalContext_) return true; +#ifdef _WIN32 + if(hDC_ && hOrigGLRC_) return true; +#else //!_WIN32 + if(Dpy_ && origCtx_) return true; +#endif //!_WIN32 + return false; + } + bool isEGL() const + { + return isEGL_; + } + // Accessor methods +#ifdef _WIN32 + HGLRC getOrigGLRC() const {return hOrigGLRC_;} + HDC getDC() const {return hDC_;} + HGLRC getIntGLRC() const {return hIntGLRC_;} +#else //!_WIN32 + Display* getDpy() const {return Dpy_;} + GLXDrawable getDrawable() const {return Drawable_;} + GLXContext getOrigCtx() const {return origCtx_;} + + Display* getIntDpy() const {return intDpy_;} + GLXDrawable getIntDrawable() const {return intDrawable_;} + GLXContext getIntCtx() const {return intCtx_;} + + EGLDisplay getEglDpy() const { return eglDisplay_; } + EGLContext getEglOrigCtx() const { return eglOriginalContext_; } +#endif //!_WIN32 + + // Initialize GL dynamic library and function pointers + bool init(intptr_t hdc, intptr_t hglrc); + + // Return true if successful, false - if error occurred + bool setIntEnv(); + bool restoreEnv(); + + amd::Monitor& getLock() { return lock_; } + + PFN_xxxGetProcAddress GetProcAddress_; + +#define GLPREFIX(rtype, fcn, dclargs) \ + PFN_##fcn fcn##_; +// Declare pointers to GL functions +#include "gl_functions.hpp" +}; + +//! Functions for executing the GL related stuff +cl_mem clCreateFromGLBufferAMD(Context& amdContext, cl_mem_flags flags, + GLuint bufobj, cl_int* errcode_ret); +cl_mem clCreateFromGLTextureAMD(Context& amdContext, cl_mem_flags flags, + GLenum target, GLint miplevel, GLuint texture, int* errcode_ret); +cl_mem clCreateFromGLRenderbufferAMD(Context& amdContext, cl_mem_flags flags, + GLuint renderbuffer, int* errcode_ret); + +bool +getCLFormatFromGL( + const Context& amdContext, + GLint gliInternalFormat, + cl_image_format* pclImageFormat, + int* piBytesPerPixel, + cl_mem_flags flags +); + +} //namespace amd + +#endif //CL_GL_AMD_HPP_ diff --git a/vdi/cl_lqdflash_amd.cpp b/vdi/cl_lqdflash_amd.cpp new file mode 100644 index 0000000000..b7bea3db2b --- /dev/null +++ b/vdi/cl_lqdflash_amd.cpp @@ -0,0 +1,310 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "cl_common.hpp" +#include + +#include "platform/object.hpp" + +#include "cl_lqdflash_amd.h" + +#if (!defined(BUILD_HSA_TARGET) && defined(WITH_HSA_DEVICE) && \ + defined(WITH_AMDGPU_PRO)) || defined(_WIN32) || defined(WITH_PAL_DEVICE) +#define WITH_LIQUID_FLASH 1 +#endif // _WIN32 + +#if defined(WITH_LIQUID_FLASH) +#include "lf.h" +#include +#include +#endif // WITH_LIQUID_FLASH + +namespace amd { + +LiquidFlashFile::~LiquidFlashFile() { close(); } + +bool LiquidFlashFile::open() { +#if defined WITH_LIQUID_FLASH + lf_status err; + lf_file_flags flags = 0; + + switch (flags_) { + case CL_FILE_READ_ONLY_AMD: + flags = LF_READ; + break; + case CL_FILE_WRITE_ONLY_AMD: + flags = LF_WRITE; + break; + case CL_FILE_READ_WRITE_AMD: + flags = LF_READ | LF_WRITE; + break; + } +#ifdef ATI_OS_LINUX + assert(sizeof(wchar_t) != sizeof(lf_char)); + std::string name_char; + std::wstring_convert, wchar_t> cv; + name_char = cv.to_bytes(name_); + handle_ = lfOpenFile(name_char.c_str(), flags, &err); +#else + handle_ = lfOpenFile(name_.c_str(), flags, &err); +#endif + + if (err != lf_success) { + return false; + } + + if (lfGetFileBlockSize((lf_file)handle_, &blockSize_) != lf_success) { + return false; + } + + if (lfGetFileSize((lf_file)handle_, &fileSize_) != lf_success) { + return false; + } + return true; +#else + return false; +#endif // WITH_LIQUID_FLASH +} + +void LiquidFlashFile::close() { +#if defined WITH_LIQUID_FLASH + if (handle_ != NULL) { + lfReleaseFile((lf_file)handle_); + handle_ = NULL; + } +#endif // WITH_LIQUID_FLASH +} + +bool LiquidFlashFile::transferBlock(bool writeBuffer, void* srcDst, uint64_t bufferSize, + uint64_t fileOffset, uint64_t bufferOffset, + uint64_t size) const { +#if defined WITH_LIQUID_FLASH + lf_status status; + + lf_region_descriptor region = {fileOffset / blockSize(), bufferOffset / blockSize(), + size / blockSize()}; + if (writeBuffer) { + status = lfReadFile(srcDst, bufferSize, (lf_file)handle_, 1, ®ion, NULL); + } else { + status = lfWriteFile(srcDst, bufferSize, (lf_file)handle_, 1, ®ion, NULL); + } + if (lf_success == status) { + return true; + } else { + return false; + } +#else + return false; +#endif // WITH_LIQUID_FLASH +} + +} // namespace amd + +/*! \addtogroup API + * @{ + * + * \addtogroup AMD_Extensions + * @{ + * + */ + +RUNTIME_ENTRY_RET(cl_file_amd, clCreateSsgFileObjectAMD, + (cl_context context, cl_file_flags_amd flags, const wchar_t* file_name, + cl_int* errcode_ret)) { +#if defined WITH_LIQUID_FLASH && defined ATI_OS_LINUX + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return (cl_file_amd)0; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + for (auto& dev : devices) { + if (lf_success == lfCheckExtensionSupportForDevice(dev->info().pcieDeviceId_, + dev->info().pcieRevisionId_)) { + supportPass = true; + break; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_DEVICE; + LogWarning("SSG isn't supported"); + return (cl_file_amd)0; + } +#endif + amd::LiquidFlashFile* file = new amd::LiquidFlashFile(file_name, flags); + + if (file == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_file_amd)0; + } + + if (!file->open()) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + delete file; + return (cl_file_amd)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(file); +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clGetSsgFileObjectInfoAMD, + (cl_file_amd file, cl_file_info_amd param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + + switch (param_name) { + case CL_FILE_BLOCK_SIZE_AMD: { + cl_uint blockSize = as_amd(file)->blockSize(); + return amd::clGetInfo(blockSize, param_value_size, param_value, param_value_size_ret); + } + case CL_FILE_SIZE_AMD: { + cl_ulong fileSize = as_amd(file)->fileSize(); + return amd::clGetInfo(fileSize, param_value_size, param_value, param_value_size_ret); + } + default: + break; + } + + return CL_INVALID_VALUE; +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clRetainSsgFileObjectAMD, (cl_file_amd file)) { + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + as_amd(file)->retain(); + return CL_SUCCESS; +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clReleaseSsgFileObjectAMD, (cl_file_amd file)) { + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + as_amd(file)->release(); + return CL_SUCCESS; +} +RUNTIME_EXIT + +static cl_int EnqueueTransferBufferFromSsgFileAMD( + cl_bool isWrite, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t buffer_offset, size_t cb, cl_file_amd file, size_t file_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* pBuffer = as_amd(buffer)->asBuffer(); + if (pBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + if (pBuffer->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { + return CL_INVALID_OPERATION; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != pBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } + + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + + amd::LiquidFlashFile* amdFile = as_amd(file); + amd::Coord3D bufferOffset(buffer_offset, 0, 0); + amd::Coord3D bufferSize(cb, 1, 1); + + if ((!pBuffer->validateRegion(bufferOffset, bufferSize)) || + // LF library supports aligned sizes only + ((buffer_offset % amdFile->blockSize()) != 0) || ((cb % amdFile->blockSize()) != 0) || + ((file_offset % amdFile->blockSize()) != 0)) { + return CL_INVALID_VALUE; + } + + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + + amd::TransferBufferFileCommand* command; + command = new amd::TransferBufferFileCommand( + isWrite ? CL_COMMAND_READ_SSG_FILE_AMD : CL_COMMAND_WRITE_SSG_FILE_AMD, hostQueue, + eventWaitList, *pBuffer, bufferOffset, bufferSize, amdFile, file_offset); + + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + if (blocking_write) { + command->awaitCompletion(); + } + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; +} + +RUNTIME_ENTRY(cl_int, clEnqueueReadSsgFileAMD, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t buffer_offset, size_t cb, cl_file_amd file, size_t file_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return EnqueueTransferBufferFromSsgFileAMD(CL_TRUE, command_queue, buffer, blocking_write, + buffer_offset, cb, file, file_offset, + num_events_in_wait_list, event_wait_list, event); +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clEnqueueWriteSsgFileAMD, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t buffer_offset, size_t cb, cl_file_amd file, size_t file_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return EnqueueTransferBufferFromSsgFileAMD(CL_FALSE, command_queue, buffer, blocking_write, + buffer_offset, cb, file, file_offset, + num_events_in_wait_list, event_wait_list, event); +} +RUNTIME_EXIT diff --git a/vdi/cl_lqdflash_amd.h b/vdi/cl_lqdflash_amd.h new file mode 100644 index 0000000000..5a3e725b4c --- /dev/null +++ b/vdi/cl_lqdflash_amd.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef __CL_LQDFLASH_AMD_H +#define __CL_LQDFLASH_AMD_H + +#include "CL/cl_ext.h" + +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +extern CL_API_ENTRY cl_file_amd CL_API_CALL +clCreateSsgFileObjectAMD(cl_context context, cl_file_flags_amd flags, const wchar_t* file_name, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clGetSsgFileObjectInfoAMD( + cl_file_amd file, cl_file_info_amd param_name, size_t param_value_size, void* param_value, + size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clRetainSsgFileObjectAMD(cl_file_amd file) + CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clReleaseSsgFileObjectAMD(cl_file_amd file) + CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadSsgFileAMD( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t buffer_offset, + size_t cb, cl_file_amd file, size_t file_offset, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteSsgFileAMD( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t buffer_offset, + size_t cb, cl_file_amd file, size_t file_offset, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} /*extern "C"*/ +#endif /*__cplusplus*/ + +#endif diff --git a/vdi/fixme.cpp b/vdi/fixme.cpp new file mode 100644 index 0000000000..90f034f63e --- /dev/null +++ b/vdi/fixme.cpp @@ -0,0 +1,32 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "vdi_common.hpp" +#include + +cl_icd_dispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0}; +amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_}; + +RUNTIME_ENTRY(cl_int, clGetDeviceIDs, + (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + return CL_SUCCESS; +} +RUNTIME_EXIT diff --git a/vdi/hip_activity.cpp b/vdi/hip_activity.cpp new file mode 100644 index 0000000000..d3ce84bfd9 --- /dev/null +++ b/vdi/hip_activity.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "platform/activity.hpp" + +extern "C" void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg) { + activity_prof::CallbacksTable::init(reinterpret_cast(id_callback), + reinterpret_cast(op_callback), + arg); +} + +extern "C" bool hipEnableActivityCallback(unsigned op, bool enable) { + return activity_prof::CallbacksTable::SetEnabled(op, enable); +} + +extern "C" const char* hipGetCmdName(unsigned op) { + return getOclCommandKindString(static_cast(op)); +} diff --git a/vdi/hip_context.cpp b/vdi/hip_context.cpp new file mode 100644 index 0000000000..440c3f4b47 --- /dev/null +++ b/vdi/hip_context.cpp @@ -0,0 +1,373 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "platform/runtime.hpp" +#include "utils/flags.hpp" +#include "utils/versions.hpp" + +std::vector g_devices; + +namespace hip { + +thread_local Device* g_device = nullptr; +thread_local std::stack g_ctxtStack; +thread_local hipError_t g_lastError = hipSuccess; +std::once_flag g_ihipInitialized; +Device* host_device = nullptr; + +void init() { + if (!amd::Runtime::initialized()) { + amd::IS_HIP = true; + GPU_NUM_MEM_DEPENDENCY = 0; + amd::Runtime::init(); + } + + const std::vector& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false); + + for (unsigned int i=0; i device(1, devices[i]); + amd::Context* context = new amd::Context(device, amd::Context::Info()); + if (!context) return; + + // Enable active wait on the device by default + devices[i]->SetActiveWait(true); + + if (context && CL_SUCCESS != context->create(nullptr)) { + context->release(); + } else { + g_devices.push_back(new Device(context, i)); + } + } + + amd::Context* hContext = new amd::Context(devices, amd::Context::Info()); + if (!hContext) return; + + if (CL_SUCCESS != hContext->create(nullptr)) { + hContext->release(); + } + host_device = new Device(hContext, -1); + + PlatformState::instance().init(); +} + +Device* getCurrentDevice() { + return g_device; +} + +void setCurrentDevice(unsigned int index) { + assert(index(stream); + if ((s->flags & hipStreamNonBlocking) == 0) { + getNullStream()->finish(); + } + return s->asHostQueue(); + } +} + +amd::HostQueue* getNullStream(amd::Context& ctx) { + for (auto& it : g_devices) { + if (it->asContext() == &ctx) { + return it->defaultStream(); + } + } + return nullptr; +} + +amd::HostQueue* getNullStream() { + Device* device = getCurrentDevice(); + return device ? device->defaultStream() : nullptr; +} + +}; + +using namespace hip; + +hipError_t hipInit(unsigned int flags) { + HIP_INIT_API(hipInit, flags); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { + HIP_INIT_API(hipCtxCreate, ctx, flags, device); + + if (static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidValue); + } + + *ctx = reinterpret_cast(g_devices[device]); + + // Increment ref count for device primary context + g_devices[device]->retain(); + g_ctxtStack.push(g_devices[device]); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxSetCurrent(hipCtx_t ctx) { + HIP_INIT_API(hipCtxSetCurrent, ctx); + + if (ctx == nullptr) { + if(!g_ctxtStack.empty()) { + g_ctxtStack.pop(); + } + } else { + hip::g_device = reinterpret_cast(ctx); + if(!g_ctxtStack.empty()) { + g_ctxtStack.pop(); + } + g_ctxtStack.push(hip::getCurrentDevice()); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { + HIP_INIT_API(hipCtxGetCurrent, ctx); + + *ctx = reinterpret_cast(hip::getCurrentDevice()); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { + HIP_INIT_API(hipCtxGetSharedMemConfig, pConfig); + + *pConfig = hipSharedMemBankSizeFourByte; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipRuntimeGetVersion(int *runtimeVersion) { + HIP_INIT_API(hipRuntimeGetVersion, runtimeVersion); + + if (!runtimeVersion) { + HIP_RETURN(hipErrorInvalidValue); + } + + *runtimeVersion = AMD_PLATFORM_BUILD_NUMBER; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxDestroy(hipCtx_t ctx) { + HIP_INIT_API(hipCtxDestroy, ctx); + + hip::Device* dev = reinterpret_cast(ctx); + if (dev == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Release last tracked command + hip::getNullStream()->setLastQueuedCommand(nullptr); + + // Need to remove the ctx of calling thread if its the top one + if (!g_ctxtStack.empty() && g_ctxtStack.top() == dev) { + g_ctxtStack.pop(); + } + + // Remove context from global context list + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == dev) { + // Decrement ref count for device primary context + dev->release(); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { + HIP_INIT_API(hipCtxPopCurrent, ctx); + + hip::Device** dev = reinterpret_cast(ctx); + if (dev == nullptr) { + HIP_RETURN(hipErrorInvalidContext); + } + + if (!g_ctxtStack.empty()) { + *dev = g_ctxtStack.top(); + g_ctxtStack.pop(); + } else { + HIP_RETURN(hipErrorInvalidContext); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxPushCurrent(hipCtx_t ctx) { + HIP_INIT_API(hipCtxPushCurrent, ctx); + + hip::Device* dev = reinterpret_cast(ctx); + if (dev == nullptr) { + HIP_RETURN(hipErrorInvalidContext); + } + + hip::g_device = dev; + g_ctxtStack.push(hip::getCurrentDevice()); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDriverGetVersion(int* driverVersion) { + HIP_INIT_API(hipDriverGetVersion, driverVersion); + + auto* deviceHandle = g_devices[0]->devices()[0]; + const auto& info = deviceHandle->info(); + + if (driverVersion) { + *driverVersion = AMD_PLATFORM_BUILD_NUMBER * 100 + + AMD_PLATFORM_REVISION_NUMBER; + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetDevice(hipDevice_t* device) { + HIP_INIT_API(hipCtxGetDevice, device); + + if (device != nullptr) { + *device = hip::getCurrentDevice()->deviceId(); + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipErrorInvalidContext); +} + +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { + HIP_INIT_API(hipCtxGetApiVersion, apiVersion); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { + HIP_INIT_API(hipCtxGetCacheConfig, cacheConfig); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { + HIP_INIT_API(hipCtxSetCacheConfig, cacheConfig); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { + HIP_INIT_API(hipCtxSetSharedMemConfig, config); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSynchronize(void) { + HIP_INIT_API(hipCtxSynchronize, 1); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxGetFlags(unsigned int* flags) { + HIP_INIT_API(hipCtxGetFlags, flags); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) { + HIP_INIT_API(hipDevicePrimaryCtxGetState, dev, flags, active); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (flags != nullptr) { + *flags = 0; + } + + if (active != nullptr) { + *active = (g_devices[dev] == hip::getCurrentDevice())? 1 : 0; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxRelease, dev); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxRetain, pctx, dev); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + if (pctx == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pctx = reinterpret_cast(g_devices[dev]); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxReset, dev); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { + HIP_INIT_API(hipDevicePrimaryCtxSetFlags, dev, flags); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } else { + HIP_RETURN(hipErrorContextAlreadyInUse); + } +} diff --git a/vdi/hip_conversions.hpp b/vdi/hip_conversions.hpp new file mode 100644 index 0000000000..2a78617ad9 --- /dev/null +++ b/vdi/hip_conversions.hpp @@ -0,0 +1,903 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +namespace hip +{ +inline +cl_channel_type getCLChannelType(const hipArray_Format hipFormat, + const hipTextureReadMode hipReadMode) { + if (hipReadMode == hipReadModeElementType) { + switch (hipFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + return CL_UNSIGNED_INT8; + case HIP_AD_FORMAT_SIGNED_INT8: + return CL_SIGNED_INT8; + case HIP_AD_FORMAT_UNSIGNED_INT16: + return CL_UNSIGNED_INT16; + case HIP_AD_FORMAT_SIGNED_INT16: + return CL_SIGNED_INT16; + case HIP_AD_FORMAT_UNSIGNED_INT32: + return CL_UNSIGNED_INT32; + case HIP_AD_FORMAT_SIGNED_INT32: + return CL_SIGNED_INT32; + case HIP_AD_FORMAT_HALF: + return CL_HALF_FLOAT; + case HIP_AD_FORMAT_FLOAT: + return CL_FLOAT; + } + } else if (hipReadMode == hipReadModeNormalizedFloat) { + switch (hipFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + return CL_UNORM_INT8; + case HIP_AD_FORMAT_SIGNED_INT8: + return CL_SNORM_INT8; + case HIP_AD_FORMAT_UNSIGNED_INT16: + return CL_UNORM_INT16; + case HIP_AD_FORMAT_SIGNED_INT16: + return CL_SNORM_INT16; + case HIP_AD_FORMAT_UNSIGNED_INT32: + return CL_UNSIGNED_INT32; + case HIP_AD_FORMAT_SIGNED_INT32: + return CL_SIGNED_INT32; + case HIP_AD_FORMAT_HALF: + return CL_HALF_FLOAT; + case HIP_AD_FORMAT_FLOAT: + return CL_FLOAT; + } + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_channel_order getCLChannelOrder(const unsigned int hipNumChannels, + const int sRGB) { + switch (hipNumChannels) { + case 1: + return CL_R; + case 2: + return CL_RG; + case 4: + return (sRGB == 1) ? CL_sRGBA : CL_RGBA; + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_mem_object_type getCLMemObjectType(const unsigned int hipWidth, + const unsigned int hipHeight, + const unsigned int hipDepth, + const unsigned int flags) { + if (flags == hipArrayDefault) { + if ((hipWidth != 0) && (hipHeight == 0) && (hipDepth == 0)) { + return CL_MEM_OBJECT_IMAGE1D; + } else if ((hipWidth != 0) && (hipHeight != 0) && (hipDepth == 0)) { + return CL_MEM_OBJECT_IMAGE2D; + } else if ((hipWidth != 0) && (hipHeight != 0) && (hipDepth != 0)) { + return CL_MEM_OBJECT_IMAGE3D; + } + } else if (flags == hipArrayLayered) { + if ((hipWidth != 0) && (hipHeight == 0) && (hipDepth != 0)) { + return CL_MEM_OBJECT_IMAGE1D_ARRAY; + } else if ((hipWidth != 0) && (hipHeight != 0) && (hipDepth != 0)) { + return CL_MEM_OBJECT_IMAGE2D_ARRAY; + } + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_addressing_mode getCLAddressingMode(const hipTextureAddressMode hipAddressMode) { + switch (hipAddressMode) { + case hipAddressModeWrap: + return CL_ADDRESS_REPEAT; + case hipAddressModeClamp: + return CL_ADDRESS_CLAMP; + case hipAddressModeMirror: + return CL_ADDRESS_MIRRORED_REPEAT; + case hipAddressModeBorder: + return CL_ADDRESS_CLAMP_TO_EDGE; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_filter_mode getCLFilterMode(const hipTextureFilterMode hipFilterMode) { + switch (hipFilterMode) { + case hipFilterModePoint: + return CL_FILTER_NEAREST; + case hipFilterModeLinear: + return CL_FILTER_LINEAR; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +cl_mem_object_type getCLMemObjectType(const hipResourceType hipResType) { + switch (hipResType) { + case hipResourceTypeLinear: + return CL_MEM_OBJECT_IMAGE1D_BUFFER; + case hipResourceTypePitch2D: + return CL_MEM_OBJECT_IMAGE2D; + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +size_t getElementSize(const hipArray_const_t array) { + switch (array->Format) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + case HIP_AD_FORMAT_SIGNED_INT8: + return 1 * array->NumChannels; + case HIP_AD_FORMAT_UNSIGNED_INT16: + case HIP_AD_FORMAT_SIGNED_INT16: + case HIP_AD_FORMAT_HALF: + return 2 * array->NumChannels; + case HIP_AD_FORMAT_UNSIGNED_INT32: + case HIP_AD_FORMAT_SIGNED_INT32: + case HIP_AD_FORMAT_FLOAT: + return 4 * array->NumChannels; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +hipChannelFormatDesc getChannelFormatDesc(int numChannels, + hipArray_Format arrayFormat) { + switch (arrayFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + switch (numChannels) { + case 1: + return {8, 0, 0, 0, hipChannelFormatKindUnsigned}; + case 2: + return {8, 8, 0, 0, hipChannelFormatKindUnsigned}; + case 4: + return {8, 8, 8, 8, hipChannelFormatKindUnsigned}; + } + case HIP_AD_FORMAT_SIGNED_INT8: + switch (numChannels) { + case 1: + return {8, 0, 0, 0, hipChannelFormatKindSigned}; + case 2: + return {8, 8, 0, 0, hipChannelFormatKindSigned}; + case 4: + return {8, 8, 8, 8, hipChannelFormatKindSigned}; + } + case HIP_AD_FORMAT_UNSIGNED_INT16: + switch (numChannels) { + case 1: + return {16, 0, 0, 0, hipChannelFormatKindUnsigned}; + case 2: + return {16, 16, 0, 0, hipChannelFormatKindUnsigned}; + case 4: + return {16, 16, 16, 16, hipChannelFormatKindUnsigned}; + } + case HIP_AD_FORMAT_SIGNED_INT16: + switch (numChannels) { + case 1: + return {16, 0, 0, 0, hipChannelFormatKindSigned}; + case 2: + return {16, 16, 0, 0, hipChannelFormatKindSigned}; + case 4: + return {16, 16, 16, 16, hipChannelFormatKindSigned}; + } + case HIP_AD_FORMAT_UNSIGNED_INT32: + switch (numChannels) { + case 1: + return {32, 0, 0, 0, hipChannelFormatKindUnsigned}; + case 2: + return {32, 32, 0, 0, hipChannelFormatKindUnsigned}; + case 4: + return {32, 32, 32, 32, hipChannelFormatKindUnsigned}; + } + case HIP_AD_FORMAT_SIGNED_INT32: + switch (numChannels) { + case 1: + return {32, 0, 0, 0, hipChannelFormatKindSigned}; + case 2: + return {32, 32, 0, 0, hipChannelFormatKindSigned}; + case 4: + return {32, 32, 32, 32, hipChannelFormatKindSigned}; + } + case HIP_AD_FORMAT_HALF: + switch (numChannels) { + case 1: + return {16, 0, 0, 0, hipChannelFormatKindFloat}; + case 2: + return {16, 16, 0, 0, hipChannelFormatKindFloat}; + case 4: + return {16, 16, 16, 16, hipChannelFormatKindFloat}; + } + case HIP_AD_FORMAT_FLOAT: + switch (numChannels) { + case 1: + return {32, 0, 0, 0, hipChannelFormatKindFloat}; + case 2: + return {32, 32, 0, 0, hipChannelFormatKindFloat}; + case 4: + return {32, 32, 32, 32, hipChannelFormatKindFloat}; + } + } + + ShouldNotReachHere(); + + return {}; +} + +inline +unsigned int getNumChannels(const hipChannelFormatDesc& desc) { + return ((desc.x != 0) + (desc.y != 0) + (desc.z != 0) + (desc.w != 0)); +} + +inline +hipArray_Format getArrayFormat(const hipChannelFormatDesc& desc) { + switch (desc.f) { + case hipChannelFormatKindUnsigned: + switch (desc.x) { + case 8: + return HIP_AD_FORMAT_UNSIGNED_INT8; + case 16: + return HIP_AD_FORMAT_UNSIGNED_INT16; + case 32: + return HIP_AD_FORMAT_UNSIGNED_INT32; + } + case hipChannelFormatKindSigned: + switch (desc.x) { + case 8: + return HIP_AD_FORMAT_SIGNED_INT8; + case 16: + return HIP_AD_FORMAT_SIGNED_INT16; + case 32: + return HIP_AD_FORMAT_SIGNED_INT32; + } + case hipChannelFormatKindFloat: + switch (desc.x) { + case 16: + return HIP_AD_FORMAT_HALF; + case 32: + return HIP_AD_FORMAT_FLOAT; + } + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +int getNumChannels(const hipResourceViewFormat hipFormat) { + switch (hipFormat) { + case hipResViewFormatUnsignedChar1: + case hipResViewFormatSignedChar1: + case hipResViewFormatUnsignedShort1: + case hipResViewFormatSignedShort1: + case hipResViewFormatUnsignedInt1: + case hipResViewFormatSignedInt1: + case hipResViewFormatHalf1: + case hipResViewFormatFloat1: + return 1; + case hipResViewFormatUnsignedChar2: + case hipResViewFormatSignedChar2: + case hipResViewFormatUnsignedShort2: + case hipResViewFormatSignedShort2: + case hipResViewFormatUnsignedInt2: + case hipResViewFormatSignedInt2: + case hipResViewFormatHalf2: + case hipResViewFormatFloat2: + return 2; + case hipResViewFormatUnsignedChar4: + case hipResViewFormatSignedChar4: + case hipResViewFormatUnsignedShort4: + case hipResViewFormatSignedShort4: + case hipResViewFormatUnsignedInt4: + case hipResViewFormatSignedInt4: + case hipResViewFormatHalf4: + case hipResViewFormatFloat4: + return 4; + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +hipArray_Format getArrayFormat(const hipResourceViewFormat hipFormat) { + switch (hipFormat) { + case hipResViewFormatUnsignedChar1: + case hipResViewFormatUnsignedChar2: + case hipResViewFormatUnsignedChar4: + return HIP_AD_FORMAT_UNSIGNED_INT8; + case hipResViewFormatSignedChar1: + case hipResViewFormatSignedChar2: + case hipResViewFormatSignedChar4: + return HIP_AD_FORMAT_SIGNED_INT8; + case hipResViewFormatUnsignedShort1: + case hipResViewFormatUnsignedShort2: + case hipResViewFormatUnsignedShort4: + return HIP_AD_FORMAT_UNSIGNED_INT16; + case hipResViewFormatSignedShort1: + case hipResViewFormatSignedShort2: + case hipResViewFormatSignedShort4: + return HIP_AD_FORMAT_SIGNED_INT16; + case hipResViewFormatUnsignedInt1: + case hipResViewFormatUnsignedInt2: + case hipResViewFormatUnsignedInt4: + return HIP_AD_FORMAT_UNSIGNED_INT32; + case hipResViewFormatSignedInt1: + case hipResViewFormatSignedInt2: + case hipResViewFormatSignedInt4: + return HIP_AD_FORMAT_SIGNED_INT32; + case hipResViewFormatHalf1: + case hipResViewFormatHalf2: + case hipResViewFormatHalf4: + return HIP_AD_FORMAT_HALF; + case hipResViewFormatFloat1: + case hipResViewFormatFloat2: + case hipResViewFormatFloat4: + return HIP_AD_FORMAT_FLOAT; + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +hipResourceViewFormat getResourceViewFormat(const hipChannelFormatDesc& desc) { + switch (desc.f) { + case hipChannelFormatKindUnsigned: + switch (getNumChannels(desc)) { + case 1: + switch (desc.x) { + case 8: + return hipResViewFormatUnsignedChar1; + case 16: + return hipResViewFormatUnsignedShort1; + case 32: + return hipResViewFormatUnsignedInt1; + } + case 2: + switch (desc.x) { + case 8: + return hipResViewFormatUnsignedChar2; + case 16: + return hipResViewFormatUnsignedShort2; + case 32: + return hipResViewFormatUnsignedInt2; + } + case 4: + switch (desc.x) { + case 8: + return hipResViewFormatUnsignedChar4; + case 16: + return hipResViewFormatUnsignedShort4; + case 32: + return hipResViewFormatUnsignedInt4; + } + } + case hipChannelFormatKindSigned: + switch (getNumChannels(desc)) { + case 1: + switch (desc.x) { + case 8: + return hipResViewFormatSignedChar1; + case 16: + return hipResViewFormatSignedShort1; + case 32: + return hipResViewFormatSignedInt1; + } + case 2: + switch (desc.x) { + case 8: + return hipResViewFormatSignedChar2; + case 16: + return hipResViewFormatSignedShort2; + case 32: + return hipResViewFormatSignedInt2; + } + case 4: + switch (desc.x) { + case 8: + return hipResViewFormatSignedChar4; + case 16: + return hipResViewFormatSignedShort4; + case 32: + return hipResViewFormatSignedInt4; + } + } + case hipChannelFormatKindFloat: + switch (getNumChannels(desc)) { + case 1: + switch (desc.x) { + case 16: + return hipResViewFormatHalf1; + case 32: + return hipResViewFormatFloat1; + } + case 2: + switch (desc.x) { + case 16: + return hipResViewFormatHalf2; + case 32: + return hipResViewFormatFloat2; + } + case 4: + switch (desc.x) { + case 16: + return hipResViewFormatHalf4; + case 32: + return hipResViewFormatFloat4; + } + } + default: + break; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +hipTextureDesc getTextureDesc(const textureReference* texRef) { + hipTextureDesc texDesc = {}; + std::memcpy(texDesc.addressMode, texRef->addressMode, sizeof(texDesc.addressMode)); + texDesc.filterMode = texRef->filterMode; + texDesc.readMode = texRef->readMode; + texDesc.sRGB = texRef->sRGB; + texDesc.normalizedCoords = texRef->normalized; + texDesc.maxAnisotropy = texRef->maxAnisotropy; + texDesc.mipmapFilterMode = texRef->mipmapFilterMode; + texDesc.mipmapLevelBias = texRef->mipmapLevelBias; + texDesc.minMipmapLevelClamp = texRef->minMipmapLevelClamp; + texDesc.maxMipmapLevelClamp = texRef->maxMipmapLevelClamp; + + return texDesc; +} + +inline +hipResourceViewDesc getResourceViewDesc(hipArray_const_t array, + const hipResourceViewFormat format) { + hipResourceViewDesc resViewDesc = {}; + resViewDesc.format = format; + resViewDesc.width = array->width; + resViewDesc.height = array->height; + resViewDesc.depth = array->depth; + resViewDesc.firstMipmapLevel = 0; + resViewDesc.lastMipmapLevel = 0; + resViewDesc.firstLayer = 0; + resViewDesc.lastLayer = 0; /* TODO add hipArray::numLayers */ + + return resViewDesc; +} + +inline +hipResourceViewDesc getResourceViewDesc(hipMipmappedArray_const_t array, + const hipResourceViewFormat format) { + hipResourceViewDesc resViewDesc = {}; + resViewDesc.format = format; + resViewDesc.width = array->width; + resViewDesc.height = array->height; + resViewDesc.depth = array->depth; + resViewDesc.firstMipmapLevel = 0; + resViewDesc.lastMipmapLevel = 0; /* TODO add hipMipmappedArray::numMipLevels */ + resViewDesc.firstLayer = 0; + resViewDesc.lastLayer = 0; /* TODO add hipArray::numLayers */ + + return resViewDesc; +} + +inline +std::pair getMemoryType(const hipMemcpyKind kind) { + switch (kind) { + case hipMemcpyHostToHost: + return {hipMemoryTypeHost, hipMemoryTypeHost}; + case hipMemcpyHostToDevice: + return {hipMemoryTypeHost, hipMemoryTypeDevice}; + case hipMemcpyDeviceToHost: + return {hipMemoryTypeDevice, hipMemoryTypeHost}; + case hipMemcpyDeviceToDevice: + return {hipMemoryTypeDevice, hipMemoryTypeDevice}; + case hipMemcpyDefault: + return {hipMemoryTypeUnified, hipMemoryTypeUnified}; + } + + ShouldNotReachHere(); + + return {}; +} + +inline +HIP_MEMCPY3D getDrvMemcpy3DDesc(const hip_Memcpy2D& desc2D) { + HIP_MEMCPY3D desc3D = {}; + + desc3D.srcXInBytes = desc2D.srcXInBytes; + desc3D.srcY = desc2D.srcY; + desc3D.srcZ = 0; + desc3D.srcLOD = 0; + desc3D.srcMemoryType = desc2D.srcMemoryType; + desc3D.srcHost = desc2D.srcHost; + desc3D.srcDevice = desc2D.srcDevice; + desc3D.srcArray = desc2D.srcArray; + desc3D.srcPitch = desc2D.srcPitch; + desc3D.srcHeight = 0; + + desc3D.dstXInBytes = desc2D.dstXInBytes; + desc3D.dstY = desc2D.dstY; + desc3D.dstZ = 0; + desc3D.dstLOD = 0; + desc3D.dstMemoryType = desc2D.dstMemoryType; + desc3D.dstHost = desc2D.dstHost; + desc3D.dstDevice = desc2D.dstDevice; + desc3D.dstArray = desc2D.dstArray; + desc3D.dstPitch = desc2D.dstPitch; + desc3D.dstHeight = 0; + + desc3D.WidthInBytes = desc2D.WidthInBytes; + desc3D.Height = desc2D.Height; + desc3D.Depth = 0; + + return desc3D; +} + +inline +HIP_MEMCPY3D getDrvMemcpy3DDesc(const hipMemcpy3DParms& desc) { + HIP_MEMCPY3D descDrv = {}; + + descDrv.WidthInBytes = desc.extent.width; + descDrv.Height = desc.extent.height; + descDrv.Depth = desc.extent.depth; + + descDrv.srcXInBytes = desc.srcPos.x; + descDrv.srcY = desc.srcPos.y; + descDrv.srcZ = desc.srcPos.z; + descDrv.srcLOD = 0; + + descDrv.dstXInBytes = desc.dstPos.x; + descDrv.dstY = desc.dstPos.y; + descDrv.dstZ = desc.dstPos.z; + descDrv.dstLOD = 0; + + if (desc.srcArray != nullptr) { + descDrv.srcMemoryType = hipMemoryTypeArray; + descDrv.srcArray = desc.srcArray; + // When reffering to array memory, hipPos::x is in elements. + descDrv.srcXInBytes *= getElementSize(desc.srcArray); + } + + if (desc.srcPtr.ptr != nullptr) { + descDrv.srcMemoryType = std::get<0>(hip::getMemoryType(desc.kind)); + descDrv.srcHost = desc.srcPtr.ptr; + descDrv.srcDevice = desc.srcPtr.ptr; + descDrv.srcPitch = desc.srcPtr.pitch; + descDrv.srcHeight = desc.srcPtr.ysize; + } + + if (desc.dstArray != nullptr) { + descDrv.dstMemoryType = hipMemoryTypeArray; + descDrv.dstArray = desc.dstArray; + // When reffering to array memory, hipPos::x is in elements. + descDrv.dstXInBytes *= getElementSize(desc.dstArray); + } + + if (desc.dstPtr.ptr != nullptr) { + descDrv.dstMemoryType = std::get<1>(getMemoryType(desc.kind)); + descDrv.dstHost = desc.dstPtr.ptr; + descDrv.dstDevice = desc.dstPtr.ptr; + descDrv.dstPitch = desc.dstPtr.pitch; + descDrv.dstHeight = desc.dstPtr.ysize; + } + + // If a HIP array is participating in the copy, the extent is defined in terms of that array's elements. + if ((desc.srcArray != nullptr) && (desc.dstArray == nullptr)) { + descDrv.WidthInBytes *= getElementSize(desc.srcArray); + } else if ((desc.srcArray == nullptr) && (desc.dstArray != nullptr)) { + descDrv.WidthInBytes *= getElementSize(desc.dstArray); + } else if ((desc.srcArray != nullptr) && (desc.dstArray != nullptr)) { + descDrv.WidthInBytes *= getElementSize(desc.dstArray); + } + + return descDrv; +} + +inline +hipResourceType getResourceType(const HIPresourcetype resType) { + // These two enums should be isomorphic. + return static_cast(resType); +} + +inline +HIPresourcetype getResourceType(const hipResourceType resType) { + // These two enums should be isomorphic. + return static_cast(resType); +} + +inline +hipResourceDesc getResourceDesc(const HIP_RESOURCE_DESC& resDesc) { + hipResourceDesc desc; + + desc.resType = getResourceType(resDesc.resType); + switch (desc.resType) { + case hipResourceTypeArray: + desc.res.array.array = resDesc.res.array.hArray; + break; + case hipResourceTypeMipmappedArray: + desc.res.mipmap.mipmap = resDesc.res.mipmap.hMipmappedArray; + break; + case hipResourceTypeLinear: + desc.res.linear.devPtr = resDesc.res.linear.devPtr; + desc.res.linear.desc = getChannelFormatDesc(resDesc.res.linear.numChannels, resDesc.res.linear.format); + desc.res.linear.sizeInBytes = resDesc.res.linear.sizeInBytes; + break; + case hipResourceTypePitch2D: + desc.res.pitch2D.devPtr = resDesc.res.pitch2D.devPtr; + desc.res.pitch2D.desc = getChannelFormatDesc(resDesc.res.pitch2D.numChannels, resDesc.res.pitch2D.format); + desc.res.pitch2D.width = resDesc.res.pitch2D.width; + desc.res.pitch2D.height = resDesc.res.pitch2D.height; + desc.res.pitch2D.pitchInBytes = resDesc.res.pitch2D.pitchInBytes; + break; + default: + break; + } + + return desc; +} + +inline +HIP_RESOURCE_DESC getResourceDesc(const hipResourceDesc& resDesc) { + HIP_RESOURCE_DESC desc; + + desc.resType = getResourceType(resDesc.resType); + switch (desc.resType) { + case HIP_RESOURCE_TYPE_ARRAY: + desc.res.array.hArray = resDesc.res.array.array; + break; + case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY: + desc.res.mipmap.hMipmappedArray = resDesc.res.mipmap.mipmap; + break; + case HIP_RESOURCE_TYPE_LINEAR: + desc.res.linear.devPtr = resDesc.res.linear.devPtr; + desc.res.linear.numChannels = getNumChannels(resDesc.res.linear.desc); + desc.res.linear.format = getArrayFormat(resDesc.res.linear.desc); + desc.res.linear.sizeInBytes = resDesc.res.linear.sizeInBytes; + break; + case HIP_RESOURCE_TYPE_PITCH2D: + desc.res.pitch2D.devPtr = resDesc.res.pitch2D.devPtr; + desc.res.pitch2D.numChannels = getNumChannels(resDesc.res.pitch2D.desc); + desc.res.pitch2D.format = getArrayFormat(resDesc.res.pitch2D.desc); + desc.res.pitch2D.width = resDesc.res.pitch2D.width; + desc.res.pitch2D.height = resDesc.res.pitch2D.height; + desc.res.pitch2D.pitchInBytes = resDesc.res.pitch2D.pitchInBytes; + break; + default: + break; + } + + return desc; +} + +inline +hipTextureAddressMode getAddressMode(const HIPaddress_mode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +HIPaddress_mode getAddressMode(const hipTextureAddressMode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +hipTextureFilterMode getFilterMode(const HIPfilter_mode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +HIPfilter_mode getFilterMode(const hipTextureFilterMode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +hipTextureReadMode getReadMode(const unsigned int flags) { + if (flags & HIP_TRSF_READ_AS_INTEGER) { + return hipReadModeElementType; + } else { + return hipReadModeNormalizedFloat; + } +} + +inline +unsigned int getReadMode(const hipTextureReadMode mode) { + if (mode == hipReadModeElementType) { + return HIP_TRSF_READ_AS_INTEGER; + } else { + return 0; + } +} + +inline +int getsRGB(const unsigned int flags) { + if (flags & HIP_TRSF_SRGB) { + return 1; + } else { + return 0; + } +} + +inline +unsigned int getsRGB(const int sRGB) { + if (sRGB == 1) { + return HIP_TRSF_SRGB; + } else { + return 0; + } +} + +inline +int getNormalizedCoords(const unsigned int flags) { + if (flags & HIP_TRSF_NORMALIZED_COORDINATES) { + return 1; + } else { + return 0; + } +} + +inline +unsigned int getNormalizedCoords(const int normalizedCoords) { + if (normalizedCoords == 1) { + return HIP_TRSF_NORMALIZED_COORDINATES; + } else { + return 0; + } +} + +inline +hipTextureDesc getTextureDesc(const HIP_TEXTURE_DESC& texDesc) { + hipTextureDesc desc; + + desc.addressMode[0] = getAddressMode(texDesc.addressMode[0]); + desc.addressMode[1] = getAddressMode(texDesc.addressMode[1]); + desc.addressMode[2] = getAddressMode(texDesc.addressMode[2]); + desc.filterMode = getFilterMode(texDesc.filterMode); + desc.readMode = getReadMode(texDesc.flags); + desc.sRGB = getsRGB(texDesc.flags); + std::memcpy(desc.borderColor, texDesc.borderColor, sizeof(desc.borderColor)); + desc.normalizedCoords = getNormalizedCoords(texDesc.flags); + desc.maxAnisotropy = texDesc.maxAnisotropy; + desc.mipmapFilterMode = getFilterMode(texDesc.mipmapFilterMode); + desc.mipmapLevelBias = texDesc.mipmapLevelBias; + desc.minMipmapLevelClamp = texDesc.minMipmapLevelClamp; + desc.maxMipmapLevelClamp = texDesc.maxMipmapLevelClamp; + + return desc; +} + +inline +HIP_TEXTURE_DESC getTextureDesc(const hipTextureDesc& texDesc) { + HIP_TEXTURE_DESC desc; + + desc.addressMode[0] = getAddressMode(texDesc.addressMode[0]); + desc.addressMode[1] = getAddressMode(texDesc.addressMode[1]); + desc.addressMode[2] = getAddressMode(texDesc.addressMode[2]); + desc.filterMode = getFilterMode(texDesc.filterMode); + desc.flags = 0; + desc.flags |= getReadMode(texDesc.readMode); + desc.flags |= getsRGB(texDesc.sRGB); + desc.flags |= getNormalizedCoords(texDesc.normalizedCoords); + desc.maxAnisotropy = texDesc.maxAnisotropy; + desc.mipmapFilterMode = getFilterMode(texDesc.mipmapFilterMode); + desc.mipmapLevelBias = texDesc.mipmapLevelBias; + desc.minMipmapLevelClamp = texDesc.minMipmapLevelClamp; + desc.maxMipmapLevelClamp = texDesc.maxMipmapLevelClamp; + std::memcpy(desc.borderColor, texDesc.borderColor, sizeof(desc.borderColor)); + + return desc; +} + +inline +hipResourceViewFormat getResourceViewFormat(const HIPresourceViewFormat format) { + // These two enums should be isomorphic. + return static_cast(format); +} + +inline +HIPresourceViewFormat getResourceViewFormat(const hipResourceViewFormat format) { + // These two enums should be isomorphic. + return static_cast(format); +} + +inline +hipResourceViewDesc getResourceViewDesc(const HIP_RESOURCE_VIEW_DESC& resViewDesc) { + hipResourceViewDesc desc; + + desc.format = getResourceViewFormat(resViewDesc.format); + desc.width = resViewDesc.width; + desc.height = resViewDesc.height; + desc.depth = resViewDesc.depth; + desc.firstMipmapLevel = resViewDesc.firstMipmapLevel; + desc.lastMipmapLevel = resViewDesc.lastMipmapLevel; + desc.firstLayer = resViewDesc.firstLayer; + desc.lastLayer = resViewDesc.lastLayer; + + return desc; +} + +inline +HIP_RESOURCE_VIEW_DESC getResourceViewDesc(const hipResourceViewDesc& resViewDesc) { + HIP_RESOURCE_VIEW_DESC desc; + + desc.format = getResourceViewFormat(resViewDesc.format); + desc.width = resViewDesc.width; + desc.height = resViewDesc.height; + desc.depth = resViewDesc.depth; + desc.firstMipmapLevel = resViewDesc.firstMipmapLevel; + desc.lastMipmapLevel = resViewDesc.lastMipmapLevel; + desc.firstLayer = resViewDesc.firstLayer; + desc.lastLayer = resViewDesc.lastLayer; + + return desc; +} + +inline +size_t getElementSize(const hipChannelFormatDesc &desc) { + return (desc.x / 4) * getNumChannels(desc); +} +}; diff --git a/vdi/hip_device.cpp b/vdi/hip_device.cpp new file mode 100644 index 0000000000..80e247f37c --- /dev/null +++ b/vdi/hip_device.cpp @@ -0,0 +1,256 @@ +/* Copyright (c) 2018-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +namespace hip { + +amd::HostQueue* Device::defaultStream() { + if (defaultStream_ == nullptr) { + const cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + defaultStream_ = new amd::HostQueue(*asContext(), *devices()[0], properties, + amd::CommandQueue::RealTimeDisabled, + amd::CommandQueue::Priority::Normal); + if ((defaultStream_ == nullptr) || + !defaultStream_->create()) { + return nullptr; + } + } + return defaultStream_; +} + +}; + +hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { + HIP_INIT_API(hipDeviceGet, device, deviceId); + + if (device != nullptr) { + *device = deviceId; + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +}; + +hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) { + + HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { + + HIP_INIT_API(hipDeviceTotalMem, bytes, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (bytes == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + + *bytes = info.globalMemSize_; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { + + HIP_INIT_API(hipDeviceComputeCapability, major, minor, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (major == nullptr || minor == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + *major = info.gfxipVersion_ / 100; + *minor = info.gfxipVersion_ % 100; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetCount(int* count) { + HIP_INIT_API(NONE, count); + + HIP_RETURN(ihipDeviceGetCount(count)); +} + +hipError_t ihipDeviceGetCount(int* count) { + if (count == nullptr) { + return hipErrorInvalidValue; + } + + // Get all available devices + *count = g_devices.size(); + + if (*count < 1) { + return hipErrorNoDevice; + } + + return hipSuccess; +} + +hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { + + HIP_INIT_API(hipDeviceGetName, (void*)name, len, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (name == nullptr || len <= 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + const auto nameLen = ::strlen(info.boardName_); + + // Make sure that the size of `dest` is big enough to hold `src` including + // trailing zero byte + if (nameLen > (cl_uint)(len - 1)) { + HIP_RETURN(hipErrorInvalidValue); + } + + ::strncpy(name, info.boardName_, (nameLen + 1)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) { + HIP_INIT_API(hipGetDeviceProperties, props, device); + + if (props == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (unsigned(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + auto* deviceHandle = g_devices[device]->devices()[0]; + + hipDeviceProp_t deviceProps = {0}; + + const auto& info = deviceHandle->info(); + ::strncpy(deviceProps.name, info.boardName_, 128); + deviceProps.totalGlobalMem = info.globalMemSize_; + deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; + deviceProps.regsPerBlock = info.availableSGPRs_; + deviceProps.warpSize = info.wavefrontWidth_; + deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; + deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; + deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; + deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; + deviceProps.maxGridSize[0] = INT32_MAX; + deviceProps.maxGridSize[1] = INT32_MAX; + deviceProps.maxGridSize[2] = INT32_MAX; + deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000; + deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000; + deviceProps.memoryBusWidth = info.globalMemChannels_ * 32; + deviceProps.totalConstMem = info.maxConstantBufferSize_; + deviceProps.major = info.gfxipVersion_ / 100; + deviceProps.minor = info.gfxipVersion_ % 100; + deviceProps.multiProcessorCount = info.maxComputeUnits_; + deviceProps.l2CacheSize = info.l2CacheSize_; + deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_; + deviceProps.computeMode = 0; + deviceProps.clockInstructionRate = info.timeStampFrequency_; + deviceProps.arch.hasGlobalInt32Atomics = 1; + deviceProps.arch.hasGlobalFloatAtomicExch = 1; + deviceProps.arch.hasSharedInt32Atomics = 1; + deviceProps.arch.hasSharedFloatAtomicExch = 1; + deviceProps.arch.hasFloatAtomicAdd = 0; + deviceProps.arch.hasGlobalInt64Atomics = 1; + deviceProps.arch.hasSharedInt64Atomics = 1; + deviceProps.arch.hasDoubles = 1; + deviceProps.arch.hasWarpVote = 0; + deviceProps.arch.hasWarpBallot = 0; + deviceProps.arch.hasWarpShuffle = 0; + deviceProps.arch.hasFunnelShift = 0; + deviceProps.arch.hasThreadFenceSystem = 1; + deviceProps.arch.hasSyncThreadsExt = 0; + deviceProps.arch.hasSurfaceFuncs = 0; + deviceProps.arch.has3dGrid = 1; + deviceProps.arch.hasDynamicParallelism = 0; + deviceProps.concurrentKernels = 1; + deviceProps.pciDomainID = info.deviceTopology_.pcie.function; + deviceProps.pciBusID = info.deviceTopology_.pcie.bus; + deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; + deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; + //deviceProps.isMultiGpuBoard = info.; + deviceProps.canMapHostMemory = 1; + deviceProps.gcnArch = info.gfxipVersion_; + deviceProps.cooperativeLaunch = info.cooperativeGroups_; + deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_; + + deviceProps.cooperativeMultiDeviceUnmatchedFunc = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedGridDim = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_; + + deviceProps.maxTexture1D = info.imageMaxBufferSize_; + deviceProps.maxTexture2D[0] = info.image2DMaxWidth_; + deviceProps.maxTexture2D[1] = info.image2DMaxHeight_; + deviceProps.maxTexture3D[0] = info.image3DMaxWidth_; + deviceProps.maxTexture3D[1] = info.image3DMaxHeight_; + deviceProps.maxTexture3D[2] = info.image3DMaxDepth_; + deviceProps.hdpMemFlushCntl = nullptr; + deviceProps.hdpRegFlushCntl = nullptr; + + deviceProps.memPitch = info.maxMemAllocSize_; + deviceProps.textureAlignment = info.imageBaseAddressAlignment_; + deviceProps.texturePitchAlignment = info.imagePitchAlignment_; + deviceProps.kernelExecTimeoutEnabled = 0; + deviceProps.ECCEnabled = info.errorCorrectionSupport_? 1:0; + + *props = deviceProps; + HIP_RETURN(hipSuccess); +} + +hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { + HIP_INIT_API(NONE, deviceId, acc); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) { + HIP_INIT_API(NONE, stream, av); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/vdi/hip_device_runtime.cpp b/vdi/hip_device_runtime.cpp new file mode 100644 index 0000000000..febf64d116 --- /dev/null +++ b/vdi/hip_device_runtime.cpp @@ -0,0 +1,569 @@ +/* Copyright (c) 2018-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { + + HIP_INIT_API(hipChooseDevice, device, properties); + + if (device == nullptr || properties == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *device = 0; + cl_uint maxMatchedCount = 0; + int count = 0; + ihipDeviceGetCount(&count); + + for (cl_int i = 0; i< count; ++i) { + hipDeviceProp_t currentProp = {0}; + cl_uint validPropCount = 0; + cl_uint matchedCount = 0; + hipError_t err = hipGetDeviceProperties(¤tProp, i); + if (properties->major != 0) { + validPropCount++; + if(currentProp.major >= properties->major) { + matchedCount++; + } + } + if (properties->minor != 0) { + validPropCount++; + if(currentProp.minor >= properties->minor) { + matchedCount++; + } + } + if(properties->totalGlobalMem != 0) { + validPropCount++; + if(currentProp.totalGlobalMem >= properties->totalGlobalMem) { + matchedCount++; + } + } + if(properties->sharedMemPerBlock != 0) { + validPropCount++; + if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) { + matchedCount++; + } + } + if(properties->maxThreadsPerBlock != 0) { + validPropCount++; + if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) { + matchedCount++; + } + } + if(properties->totalConstMem != 0) { + validPropCount++; + if(currentProp.totalConstMem >= properties->totalConstMem ) { + matchedCount++; + } + } + if(properties->multiProcessorCount != 0) { + validPropCount++; + if(currentProp.multiProcessorCount >= + properties->multiProcessorCount ) { + matchedCount++; + } + } + if(properties->maxThreadsPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxThreadsPerMultiProcessor >= + properties->maxThreadsPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->memoryClockRate != 0) { + validPropCount++; + if(currentProp.memoryClockRate >= properties->memoryClockRate ) { + matchedCount++; + } + } + if(properties->memoryBusWidth != 0) { + validPropCount++; + if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) { + matchedCount++; + } + } + if(properties->l2CacheSize != 0) { + validPropCount++; + if(currentProp.l2CacheSize >= properties->l2CacheSize ) { + matchedCount++; + } + } + if(properties->regsPerBlock != 0) { + validPropCount++; + if(currentProp.regsPerBlock >= properties->regsPerBlock ) { + matchedCount++; + } + } + if(properties->maxSharedMemoryPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxSharedMemoryPerMultiProcessor >= + properties->maxSharedMemoryPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->warpSize != 0) { + validPropCount++; + if(currentProp.warpSize >= properties->warpSize ) { + matchedCount++; + } + } + if(validPropCount == matchedCount) { + *device = matchedCount > maxMatchedCount ? i : *device; + maxMatchedCount = std::max(matchedCount, maxMatchedCount); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { + + HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device); + + if (pi == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + int count = 0; + ihipDeviceGetCount(&count); + if (device < 0 || device >= count) { + HIP_RETURN(hipErrorInvalidDevice); + } + + //FIXME: should we cache the props, or just select from deviceHandle->info_? + hipDeviceProp_t prop = {0}; + hipError_t err = hipGetDeviceProperties(&prop, device); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + *pi = prop.maxThreadsPerBlock; + break; + case hipDeviceAttributeMaxBlockDimX: + *pi = prop.maxThreadsDim[0]; + break; + case hipDeviceAttributeMaxBlockDimY: + *pi = prop.maxThreadsDim[1]; + break; + case hipDeviceAttributeMaxBlockDimZ: + *pi = prop.maxThreadsDim[2]; + break; + case hipDeviceAttributeMaxGridDimX: + *pi = prop.maxGridSize[0]; + break; + case hipDeviceAttributeMaxGridDimY: + *pi = prop.maxGridSize[1]; + break; + case hipDeviceAttributeMaxGridDimZ: + *pi = prop.maxGridSize[2]; + break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + *pi = prop.sharedMemPerBlock; + break; + case hipDeviceAttributeTotalConstantMemory: + *pi = prop.totalConstMem; + break; + case hipDeviceAttributeWarpSize: + *pi = prop.warpSize; + break; + case hipDeviceAttributeMaxRegistersPerBlock: + *pi = prop.regsPerBlock; + break; + case hipDeviceAttributeClockRate: + *pi = prop.clockRate; + break; + case hipDeviceAttributeMemoryClockRate: + *pi = prop.memoryClockRate; + break; + case hipDeviceAttributeMemoryBusWidth: + *pi = prop.memoryBusWidth; + break; + case hipDeviceAttributeMultiprocessorCount: + *pi = prop.multiProcessorCount; + break; + case hipDeviceAttributeComputeMode: + *pi = prop.computeMode; + break; + case hipDeviceAttributeL2CacheSize: + *pi = prop.l2CacheSize; + break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + *pi = prop.maxThreadsPerMultiProcessor; + break; + case hipDeviceAttributeComputeCapabilityMajor: + *pi = prop.major; + break; + case hipDeviceAttributeComputeCapabilityMinor: + *pi = prop.minor; + break; + case hipDeviceAttributePciBusId: + *pi = prop.pciBusID; + break; + case hipDeviceAttributeConcurrentKernels: + *pi = prop.concurrentKernels; + break; + case hipDeviceAttributePciDeviceId: + *pi = prop.pciDeviceID; + break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + *pi = prop.maxSharedMemoryPerMultiProcessor; + break; + case hipDeviceAttributeIsMultiGpuBoard: + *pi = prop.isMultiGpuBoard; + break; + case hipDeviceAttributeCooperativeLaunch: + *pi = prop.cooperativeLaunch; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + *pi = prop.cooperativeMultiDeviceLaunch; + break; + case hipDeviceAttributeMaxTexture1DWidth: + *pi = prop.maxTexture1D; + break; + case hipDeviceAttributeMaxTexture2DWidth: + *pi = prop.maxTexture2D[0]; + break; + case hipDeviceAttributeMaxTexture2DHeight: + *pi = prop.maxTexture2D[1]; + break; + case hipDeviceAttributeMaxTexture3DWidth: + *pi = prop.maxTexture3D[0]; + break; + case hipDeviceAttributeMaxTexture3DHeight: + *pi = prop.maxTexture3D[1]; + break; + case hipDeviceAttributeMaxTexture3DDepth: + *pi = prop.maxTexture3D[2]; + break; + case hipDeviceAttributeHdpMemFlushCntl: + *reinterpret_cast(pi) = prop.hdpMemFlushCntl; + break; + case hipDeviceAttributeHdpRegFlushCntl: + *reinterpret_cast(pi) = prop.hdpRegFlushCntl; + break; + case hipDeviceAttributeMaxPitch: + *pi = prop.memPitch; + break; + case hipDeviceAttributeTextureAlignment: + *pi = prop.textureAlignment; + break; + case hipDeviceAttributeTexturePitchAlignment: + *pi = prop.texturePitchAlignment; + break; + case hipDeviceAttributeKernelExecTimeout: + *pi = prop.kernelExecTimeoutEnabled; + break; + case hipDeviceAttributeCanMapHostMemory: + *pi = prop.canMapHostMemory; + break; + case hipDeviceAttributeEccEnabled: + *pi = prop.ECCEnabled; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc: + *pi = prop.cooperativeMultiDeviceUnmatchedFunc; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim: + *pi = prop.cooperativeMultiDeviceUnmatchedGridDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim: + *pi = prop.cooperativeMultiDeviceUnmatchedBlockDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem: + *pi = prop.cooperativeMultiDeviceUnmatchedSharedMem; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { + + HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr); + + if (device == nullptr || pciBusIdstr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + int pciBusID = -1; + int pciDeviceID = -1; + int pciDomainID = -1; + + if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) { + int count = 0; + ihipDeviceGetCount(&count); + for (cl_int i = 0; i < count; i++) { + int pi = 0; + hipDevice_t dev; + hipDeviceGet(&dev, i); + hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, dev); + + if (pciBusID == pi) { + *device = i; + break; + } + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { + HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig); + + if(cacheConfig == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *cacheConfig = hipFuncCache_t(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { + + HIP_INIT_API(hipDeviceGetLimit, pValue, limit); + + if(pValue == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if(limit == hipLimitMallocHeapSize) { + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, ihipGetDevice()); + + *pValue = prop.totalGlobalMem; + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorUnsupportedLimit); + } +} + +/** +hipError_t hipDeviceGetP2PAttribute ( int* value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice ) { + assert(0); + HIP_RETURN(hipSuccess); +} +**/ + +hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { + + HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device); + + int count; + ihipDeviceGetCount(&count); + if (device < 0 || device > count) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (pciBusId == nullptr || len < 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, device); + + snprintf (pciBusId, len, "%04x:%02x:%02x.0", + prop.pciDomainID, + prop.pciBusID, + prop.pciDeviceID); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { + HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig); + + *pConfig = hipSharedMemBankSizeFourByte; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceReset ( void ) { + HIP_INIT_API(hipDeviceReset); + + /* FIXME */ + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { + HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { + HIP_INIT_API(hipDeviceSetSharedMemConfig, config); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSynchronize ( void ) { + HIP_INIT_API(hipDeviceSynchronize); + + hip::syncStreams(); + + amd::HostQueue* queue = hip::getNullStream(); + + if (!queue) { + HIP_RETURN(hipErrorOutOfMemory); + } + + queue->finish(); + HIP_RETURN(hipSuccess); +} + +int ihipGetDevice() { + return hip::getCurrentDevice()->deviceId(); +} + +hipError_t hipGetDevice ( int* deviceId ) { + HIP_INIT_API(hipGetDevice, deviceId); + + if (deviceId != nullptr) { + int dev = ihipGetDevice(); + if (dev == -1) { + HIP_RETURN(hipErrorNoDevice); + } + *deviceId = dev; + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorInvalidValue); + } +} + +hipError_t hipGetDeviceCount ( int* count ) { + HIP_INIT_API(hipGetDeviceCount, count); + + HIP_RETURN(ihipDeviceGetCount(count)); +} + +hipError_t hipGetDeviceFlags ( unsigned int* flags ) { + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { + HIP_INIT_API(NONE, handle, event); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) { + HIP_INIT_API(NONE, event, handle); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipSetDevice ( int device ) { + HIP_INIT_API(hipSetDevice, device); + + if (static_cast(device) < g_devices.size()) { + hip::setCurrentDevice(device); + + HIP_RETURN(hipSuccess); + } + HIP_RETURN(hipErrorInvalidDevice); +} + +hipError_t hipSetDeviceFlags ( unsigned int flags ) { + HIP_INIT_API(hipSetDeviceFlags, flags); + + constexpr uint32_t supportedFlags = + hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax; + + if (flags & ~supportedFlags) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + switch (flags & hipDeviceScheduleMask) { + case hipDeviceScheduleAuto: + // Current behavior is different from the spec, due to MT usage in runtime + if (hip::host_device->devices().size() >= std::thread::hardware_concurrency()) { + device->SetActiveWait(false); + break; + } + // Fall through for active wait... + case hipDeviceScheduleSpin: + case hipDeviceScheduleYield: + // The both options falls into yield, because MT usage in runtime + device->SetActiveWait(true); + break; + case hipDeviceScheduleBlockingSync: + device->SetActiveWait(false); + break; + default: + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipSetValidDevices ( int* device_arr, int len ) { + HIP_INIT_API(NONE, device_arr, len); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) { + HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount); + + amd::Device* amd_dev_obj1 = nullptr; + amd::Device* amd_dev_obj2 = nullptr; + const int numDevices = static_cast(g_devices.size()); + + if ((device1 < 0) || (device1 >= numDevices) || (device2 < 0) || (device2 >= numDevices)) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if ((linktype == nullptr) || (hopcount == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd_dev_obj1 = g_devices[device1]->devices()[0]; + amd_dev_obj2 = g_devices[device2]->devices()[0]; + + if (!amd_dev_obj1->findLinkTypeAndHopCount(amd_dev_obj2, linktype, hopcount)) { + HIP_RETURN(hipErrorInvalidHandle); + } + + HIP_RETURN(hipSuccess); +} + diff --git a/vdi/hip_error.cpp b/vdi/hip_error.cpp new file mode 100644 index 0000000000..5802629154 --- /dev/null +++ b/vdi/hip_error.cpp @@ -0,0 +1,172 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipGetLastError() +{ + HIP_INIT_API(hipGetLastError); + hipError_t err = hip::g_lastError; + hip::g_lastError = hipSuccess; + return err; +} + +hipError_t hipPeekAtLastError() +{ + HIP_INIT_API(hipPeekAtLastError); + hipError_t err = hip::g_lastError; + HIP_RETURN(err); +} + +const char *hipGetErrorName(hipError_t hip_error) +{ + switch (hip_error) { + case hipSuccess: + return "hipSuccess"; + case hipErrorInvalidValue: + return "hipErrorInvalidValue"; + case hipErrorOutOfMemory: + return "hipErrorOutOfMemory"; + case hipErrorNotInitialized: + return "hipErrorNotInitialized"; + case hipErrorDeinitialized: + return "hipErrorDeinitialized"; + case hipErrorProfilerDisabled: + return "hipErrorProfilerDisabled"; + case hipErrorProfilerNotInitialized: + return "hipErrorProfilerNotInitialized"; + case hipErrorProfilerAlreadyStarted: + return "hipErrorProfilerAlreadyStarted"; + case hipErrorProfilerAlreadyStopped: + return "hipErrorProfilerAlreadyStopped"; + case hipErrorInvalidConfiguration: + return "hipErrorInvalidConfiguration"; + case hipErrorInvalidSymbol: + return "hipErrorInvalidSymbol"; + case hipErrorInvalidDevicePointer: + return "hipErrorInvalidDevicePointer"; + case hipErrorInvalidMemcpyDirection: + return "hipErrorInvalidMemcpyDirection"; + case hipErrorInsufficientDriver: + return "hipErrorInsufficientDriver"; + case hipErrorMissingConfiguration: + return "hipErrorMissingConfiguration"; + case hipErrorPriorLaunchFailure: + return "hipErrorPriorLaunchFailure"; + case hipErrorInvalidDeviceFunction: + return "hipErrorInvalidDeviceFunction"; + case hipErrorNoDevice: + return "hipErrorNoDevice"; + case hipErrorInvalidDevice: + return "hipErrorInvalidDevice"; + case hipErrorInvalidImage: + return "hipErrorInvalidImage"; + case hipErrorInvalidContext: + return "hipErrorInvalidContext"; + case hipErrorContextAlreadyCurrent: + return "hipErrorContextAlreadyCurrent"; + case hipErrorMapFailed: + return "hipErrorMapFailed"; + case hipErrorUnmapFailed: + return "hipErrorUnmapFailed"; + case hipErrorArrayIsMapped: + return "hipErrorArrayIsMapped"; + case hipErrorAlreadyMapped: + return "hipErrorAlreadyMapped"; + case hipErrorNoBinaryForGpu: + return "hipErrorNoBinaryForGpu"; + case hipErrorAlreadyAcquired: + return "hipErrorAlreadyAcquired"; + case hipErrorNotMapped: + return "hipErrorNotMapped"; + case hipErrorNotMappedAsArray: + return "hipErrorNotMappedAsArray"; + case hipErrorNotMappedAsPointer: + return "hipErrorNotMappedAsPointer"; + case hipErrorECCNotCorrectable: + return "hipErrorECCNotCorrectable"; + case hipErrorUnsupportedLimit: + return "hipErrorUnsupportedLimit"; + case hipErrorContextAlreadyInUse: + return "hipErrorContextAlreadyInUse"; + case hipErrorPeerAccessUnsupported: + return "hipErrorPeerAccessUnsupported"; + case hipErrorInvalidKernelFile: + return "hipErrorInvalidKernelFile"; + case hipErrorInvalidGraphicsContext: + return "hipErrorInvalidGraphicsContext"; + case hipErrorInvalidSource: + return "hipErrorInvalidSource"; + case hipErrorFileNotFound: + return "hipErrorFileNotFound"; + case hipErrorSharedObjectSymbolNotFound: + return "hipErrorSharedObjectSymbolNotFound"; + case hipErrorSharedObjectInitFailed: + return "hipErrorSharedObjectInitFailed"; + case hipErrorOperatingSystem: + return "hipErrorOperatingSystem"; + case hipErrorInvalidHandle: + return "hipErrorInvalidHandle"; + case hipErrorNotFound: + return "hipErrorNotFound"; + case hipErrorNotReady: + return "hipErrorNotReady"; + case hipErrorIllegalAddress: + return "hipErrorIllegalAddress"; + case hipErrorLaunchOutOfResources: + return "hipErrorLaunchOutOfResources"; + case hipErrorLaunchTimeOut: + return "hipErrorLaunchTimeOut"; + case hipErrorPeerAccessAlreadyEnabled: + return "hipErrorPeerAccessAlreadyEnabled"; + case hipErrorPeerAccessNotEnabled: + return "hipErrorPeerAccessNotEnabled"; + case hipErrorSetOnActiveProcess: + return "hipErrorSetOnActiveProcess"; + case hipErrorAssert: + return "hipErrorAssert"; + case hipErrorHostMemoryAlreadyRegistered: + return "hipErrorHostMemoryAlreadyRegistered"; + case hipErrorHostMemoryNotRegistered: + return "hipErrorHostMemoryNotRegistered"; + case hipErrorLaunchFailure: + return "hipErrorLaunchFailure"; + case hipErrorNotSupported: + return "hipErrorNotSupported"; + case hipErrorUnknown: + return "hipErrorUnknown"; + case hipErrorRuntimeMemory: + return "hipErrorRuntimeMemory"; + case hipErrorRuntimeOther: + return "hipErrorRuntimeOther"; + case hipErrorTbd: + return "hipErrorTbd"; + default: + return "hipErrorUnknown"; + }; +} + +const char *hipGetErrorString(hipError_t hip_error) +{ + return hipGetErrorName(hip_error); +} + diff --git a/vdi/hip_event.cpp b/vdi/hip_event.cpp new file mode 100644 index 0000000000..677becd67e --- /dev/null +++ b/vdi/hip_event.cpp @@ -0,0 +1,254 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_event.hpp" + +namespace hip { + +bool Event::ready() { + if (event_->status() != CL_COMPLETE) { + event_->notifyCmdQueue(); + } + + return (event_->status() == CL_COMPLETE); +} + +hipError_t Event::query() { + amd::ScopedLock lock(lock_); + + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + return ready() ? hipSuccess : hipErrorNotReady; +} + +hipError_t Event::synchronize() { + amd::ScopedLock lock(lock_); + + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + event_->awaitCompletion(); + + return hipSuccess; +} + +hipError_t Event::elapsedTime(Event& eStop, float& ms) { + amd::ScopedLock startLock(lock_); + + if (this == &eStop) { + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + if (flags & hipEventDisableTiming) { + return hipErrorInvalidHandle; + } + + if (!ready()) { + return hipErrorNotReady; + } + + ms = 0.f; + return hipSuccess; + } + amd::ScopedLock stopLock(eStop.lock_); + + if (event_ == nullptr || + eStop.event_ == nullptr) { + return hipErrorInvalidHandle; + } + + if ((flags | eStop.flags) & hipEventDisableTiming) { + return hipErrorInvalidHandle; + } + + if (!ready() || !eStop.ready()) { + return hipErrorNotReady; + } + + ms = static_cast(static_cast(eStop.event_->profilingInfo().end_ - + event_->profilingInfo().start_))/1000000.f; + + return hipSuccess; +} + +hipError_t Event::streamWait(amd::HostQueue* hostQueue, uint flags) { + if ((event_ == nullptr) || (event_->command().queue() == hostQueue)) { + return hipSuccess; + } + + amd::ScopedLock lock(lock_); + bool retain = false; + + if (!event_->notifyCmdQueue()) { + return hipErrorLaunchOutOfResources; + } + amd::Command::EventWaitList eventWaitList; + eventWaitList.push_back(event_); + + amd::Command* command = new amd::Marker(*hostQueue, false, eventWaitList); + if (command == NULL) { + return hipErrorOutOfMemory; + } + command->enqueue(); + command->release(); + + return hipSuccess; +} + +void Event::addMarker(amd::HostQueue* queue, amd::Command* command) { + amd::ScopedLock lock(lock_); + + if (event_ == &command->event()) return; + + if (event_ != nullptr) { + event_->release(); + } + + event_ = &command->event(); +} + +} + +hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { + if (event == nullptr) { + return hipErrorInvalidValue; + } + + unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming | + hipEventReleaseToDevice | hipEventReleaseToSystem; + const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem); + + const bool illegalFlags = + (flags & ~supportedFlags) || // can't set any unsupported flags. + (flags & releaseFlags) == releaseFlags; // can't set both release flags + + if (!illegalFlags) { + hip::Event* e = new hip::Event(flags); + + if (e == nullptr) { + return hipErrorOutOfMemory; + } + + *event = reinterpret_cast(e); + } else { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipEventQuery(hipEvent_t event) { + if (event == nullptr) { + return hipErrorInvalidHandle; + } + + hip::Event* e = reinterpret_cast(event); + + return e->query(); +} + +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { + HIP_INIT_API(hipEventCreateWithFlags, event, flags); + + HIP_RETURN(ihipEventCreateWithFlags(event, flags)); +} + +hipError_t hipEventCreate(hipEvent_t* event) { + HIP_INIT_API(hipEventCreate, event); + + HIP_RETURN(ihipEventCreateWithFlags(event, 0)); +} + +hipError_t hipEventDestroy(hipEvent_t event) { + HIP_INIT_API(hipEventDestroy, event); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + delete reinterpret_cast(event); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { + HIP_INIT_API(hipEventElapsedTime, ms, start, stop); + + if (start == nullptr || stop == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + if (ms == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::Event* eStart = reinterpret_cast(start); + hip::Event* eStop = reinterpret_cast(stop); + + HIP_RETURN(eStart->elapsedTime(*eStop, *ms)); +} + +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { + HIP_INIT_API(hipEventRecord, event, stream); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + + hip::Stream* s = reinterpret_cast(stream); + amd::HostQueue* queue = hip::getQueue(stream); + + amd::Command* command = (s != nullptr && (s->flags & hipStreamNonBlocking)) ? + queue->getLastQueuedCommand(true) : nullptr; + + if (command == nullptr) { + command = new amd::Marker(*queue, false); + command->enqueue(); + } + + e->addMarker(queue, command); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipEventSynchronize(hipEvent_t event) { + HIP_INIT_API(hipEventSynchronize, event); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + + HIP_RETURN(e->synchronize()); +} + +hipError_t hipEventQuery(hipEvent_t event) { + HIP_INIT_API(hipEventQuery, event); + + HIP_RETURN(ihipEventQuery(event)); +} diff --git a/vdi/hip_event.hpp b/vdi/hip_event.hpp new file mode 100644 index 0000000000..2360c972bb --- /dev/null +++ b/vdi/hip_event.hpp @@ -0,0 +1,68 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_EVENT_H +#define HIP_EVENT_H + +#include "hip_internal.hpp" +#include "thread/monitor.hpp" + +namespace hip { + +class TimerMarker: public amd::Marker { +public: + TimerMarker(amd::HostQueue& queue) : amd::Marker(queue, false) { + profilingInfo_.enabled_ = true; + profilingInfo_.callback_ = nullptr; + profilingInfo_.start_ = profilingInfo_.end_ = 0; + } +}; + +class Event { +public: + Event(unsigned int flags) : flags(flags), lock_("hipEvent_t"), event_(nullptr) { + // No need to init event_ here as addMarker does that + } + + ~Event() { + if (event_ != nullptr) { + event_->release(); + } + } + unsigned int flags; + + hipError_t query(); + hipError_t synchronize(); + hipError_t elapsedTime(Event& stop, float& ms); + hipError_t streamWait(amd::HostQueue* queue, uint flags); + + void addMarker(amd::HostQueue* queue, amd::Command* command); + +private: + amd::Monitor lock_; + amd::HostQueue* stream_; + amd::Event* event_; + + bool ready(); +}; + +}; + +#endif // HIP_EVEMT_H diff --git a/vdi/hip_formatting.hpp b/vdi/hip_formatting.hpp new file mode 100644 index 0000000000..8c26249e03 --- /dev/null +++ b/vdi/hip_formatting.hpp @@ -0,0 +1,843 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ +#include +#include + +inline std::ostream& operator<<(std::ostream& os, const hipTextureFilterMode& s) { + switch (s) { + case hipFilterModePoint: + os << "hipFilterModePoint"; + break; + case hipFilterModeLinear: + os << "hipFilterModeLinear"; + break; + default: + os << "hipFilterModePoint"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureReadMode& s) { + switch (s) { + case hipReadModeElementType: + os << "hipReadModeElementType"; + break; + case hipReadModeNormalizedFloat: + os << "hipReadModeNormalizedFloat"; + break; + default: + os << "hipReadModeElementType"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureAddressMode& s) { + switch (s) { + case hipAddressModeWrap: + os << "hipAddressModeWrap"; + break; + case hipAddressModeClamp: + os << "hipAddressModeClamp"; + break; + case hipAddressModeMirror: + os << "hipAddressModeMirror"; + break; + case hipAddressModeBorder: + os << "hipAddressModeBorder"; + break; + default: + os << "hipAddressModeWrap"; + }; + return os; +} + + +inline std::ostream& operator<<(std::ostream& os, const hipMemcpyKind& s) { + switch (s) { + case hipMemcpyHostToHost: + os << "hipMemcpyHostToHost"; + break; + case hipMemcpyHostToDevice: + os << "hipMemcpyHostToDevice"; + break; + case hipMemcpyDeviceToHost: + os << "hipMemcpyDeviceToHost"; + break; + case hipMemcpyDeviceToDevice: + os << "hipMemcpyDeviceToDevice"; + break; + case hipMemcpyDefault: + os << "hipMemcpyDefault"; + break; + default: + os << "hipMemcpyDefault"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipChannelFormatKind& s) { + switch (s) { + case hipChannelFormatKindSigned: + os << "hipChannelFormatKindSigned"; + break; + case hipChannelFormatKindUnsigned: + os << "hipMemcpyHostToDevice"; + break; + case hipChannelFormatKindFloat: + os << "hipChannelFormatKindFloat"; + break; + case hipChannelFormatKindNone: + os << "hipChannelFormatKindNone"; + break; + default: + os << "hipChannelFormatKindNone"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipArray_Format& s) { + switch (s) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + os << "HIP_AD_FORMAT_UNSIGNED_INT8"; + break; + case HIP_AD_FORMAT_UNSIGNED_INT16: + os << "HIP_AD_FORMAT_UNSIGNED_INT16"; + break; + case HIP_AD_FORMAT_UNSIGNED_INT32: + os << "HIP_AD_FORMAT_UNSIGNED_INT32"; + break; + case HIP_AD_FORMAT_SIGNED_INT8: + os << "HIP_AD_FORMAT_SIGNED_INT8"; + break; + case HIP_AD_FORMAT_SIGNED_INT16: + os << "HIP_AD_FORMAT_SIGNED_INT16"; + break; + case HIP_AD_FORMAT_SIGNED_INT32: + os << "HIP_AD_FORMAT_SIGNED_INT32"; + break; + case HIP_AD_FORMAT_HALF: + os << "HIP_AD_FORMAT_HALF"; + break; + case HIP_AD_FORMAT_FLOAT: + os << "HIP_AD_FORMAT_FLOAT"; + break; + default: + os << "HIP_AD_FORMAT_FLOAT"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipResourceViewFormat& s) { + switch (s) { + case hipResViewFormatNone: + os << "hipResViewFormatNone"; + break; + case hipResViewFormatUnsignedChar1: + os << "hipResViewFormatUnsignedChar1"; + break; + case hipResViewFormatUnsignedChar2: + os << "hipResViewFormatUnsignedChar2"; + break; + case hipResViewFormatUnsignedChar4: + os << "hipResViewFormatUnsignedChar4"; + break; + case hipResViewFormatSignedChar1: + os << "hipResViewFormatSignedChar1"; + break; + case hipResViewFormatSignedChar2: + os << "hipResViewFormatSignedChar2"; + break; + case hipResViewFormatSignedChar4: + os << "hipResViewFormatSignedChar4"; + break; + case hipResViewFormatUnsignedShort1: + os << "hipResViewFormatUnsignedShort1"; + break; + case hipResViewFormatUnsignedShort2: + os << "hipResViewFormatUnsignedShort2"; + break; + case hipResViewFormatUnsignedShort4: + os << "hipResViewFormatUnsignedShort4"; + break; + case hipResViewFormatSignedShort1: + os << "hipResViewFormatSignedShort1"; + break; + case hipResViewFormatSignedShort2: + os << "hipResViewFormatSignedShort2"; + break; + case hipResViewFormatSignedShort4: + os << "hipResViewFormatSignedShort4"; + break; + case hipResViewFormatUnsignedInt1: + os << "hipResViewFormatUnsignedInt1"; + break; + case hipResViewFormatUnsignedInt2: + os << "hipResViewFormatUnsignedInt2"; + break; + case hipResViewFormatUnsignedInt4: + os << "hipResViewFormatUnsignedInt4"; + break; + case hipResViewFormatSignedInt1: + os << "hipResViewFormatSignedInt1"; + break; + case hipResViewFormatSignedInt2: + os << "hipResViewFormatSignedInt2"; + break; + case hipResViewFormatSignedInt4: + os << "hipResViewFormatSignedInt4"; + break; + case hipResViewFormatHalf1: + os << "hipResViewFormatHalf1"; + break; + case hipResViewFormatHalf2: + os << "hipResViewFormatHalf2"; + break; + case hipResViewFormatHalf4: + os << "hipResViewFormatHalf4"; + break; + case hipResViewFormatFloat1: + os << "hipResViewFormatFloat1"; + break; + case hipResViewFormatFloat2: + os << "hipResViewFormatFloat2"; + break; + case hipResViewFormatFloat4: + os << "hipResViewFormatFloat4"; + break; + case hipResViewFormatUnsignedBlockCompressed1: + os << "hipResViewFormatUnsignedBlockCompressed1"; + break; + case hipResViewFormatUnsignedBlockCompressed2: + os << "hipResViewFormatUnsignedBlockCompressed2"; + break; + case hipResViewFormatUnsignedBlockCompressed3: + os << "hipResViewFormatUnsignedBlockCompressed3"; + break; + case hipResViewFormatUnsignedBlockCompressed4: + os << "hipResViewFormatUnsignedBlockCompressed4"; + break; + case hipResViewFormatSignedBlockCompressed4: + os << "hipResViewFormatSignedBlockCompressed4"; + break; + case hipResViewFormatUnsignedBlockCompressed5: + os << "hipResViewFormatUnsignedBlockCompressed5"; + break; + case hipResViewFormatSignedBlockCompressed5: + os << "hipResViewFormatSignedBlockCompressed5"; + break; + case hipResViewFormatUnsignedBlockCompressed6H: + os << "hipResViewFormatUnsignedBlockCompressed6H"; + break; + case hipResViewFormatSignedBlockCompressed6H: + os << "hipResViewFormatSignedBlockCompressed6H"; + break; + case hipResViewFormatUnsignedBlockCompressed7: + os << "hipResViewFormatUnsignedBlockCompressed7"; + break; + default: + os << "hipResViewFormatNone"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipFunction_attribute& s) { + switch (s) { + case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: + os << "HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK"; + break; + case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_NUM_REGS: + os << "HIP_FUNC_ATTRIBUTE_NUM_REGS"; + break; + case HIP_FUNC_ATTRIBUTE_PTX_VERSION: + os << "HIP_FUNC_ATTRIBUTE_PTX_VERSION"; + break; + case HIP_FUNC_ATTRIBUTE_BINARY_VERSION: + os << "HIP_FUNC_ATTRIBUTE_BINARY_VERSION"; + break; + case HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA: + os << "HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA"; + break; + case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: + os << "HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT"; + break; + case HIP_FUNC_ATTRIBUTE_MAX: + os << "HIP_FUNC_ATTRIBUTE_MAX"; + break; + default: + os << "HIP_FUNC_ATTRIBUTE_MAX"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hiprtcResult& s) { + switch (s) { + case HIPRTC_SUCCESS: + os << "HIPRTC_SUCCESS"; + break; + case HIPRTC_ERROR_OUT_OF_MEMORY: + os << "HIPRTC_ERROR_OUT_OF_MEMORY"; + break; + case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: + os << "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE"; + break; + case HIPRTC_ERROR_INVALID_INPUT: + os << "HIPRTC_ERROR_INVALID_INPUT"; + break; + case HIPRTC_ERROR_INVALID_PROGRAM: + os << "HIPRTC_ERROR_INVALID_PROGRAM"; + break; + case HIPRTC_ERROR_INVALID_OPTION: + os << "HIPRTC_ERROR_INVALID_OPTION"; + break; + case HIPRTC_ERROR_COMPILATION: + os << "HIPRTC_ERROR_COMPILATION"; + break; + case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: + os << "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE"; + break; + case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + os << "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION"; + break; + case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + os << "IPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION"; + break; + case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + os << "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID"; + break; + case HIPRTC_ERROR_INTERNAL_ERROR: + os << "HIPRTC_ERROR_INTERNAL_ERROR"; + break; + default: + os << "HIPRTC_ERROR_INTERNAL_ERROR"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipJitOption& s) { + switch (s) { + case hipJitOptionMaxRegisters: + os << "hipJitOptionMaxRegisters"; + break; + case hipJitOptionThreadsPerBlock: + os << "hipJitOptionThreadsPerBlock"; + break; + case hipJitOptionWallTime: + os << "hipJitOptionWallTime"; + break; + case hipJitOptionInfoLogBuffer: + os << "hipJitOptionInfoLogBuffer"; + break; + case hipJitOptionInfoLogBufferSizeBytes: + os << "hipJitOptionInfoLogBufferSizeBytes"; + break; + case hipJitOptionErrorLogBuffer: + os << "hipJitOptionErrorLogBuffer"; + break; + case hipJitOptionErrorLogBufferSizeBytes: + os << "hipJitOptionErrorLogBufferSizeBytes"; + break; + case hipJitOptionOptimizationLevel: + os << "hipJitOptionOptimizationLevel"; + break; + case hipJitOptionTargetFromContext: + os << "hipJitOptionTargetFromContext"; + break; + case hipJitOptionTarget: + os << "hipJitOptionTarget"; + break; + case hipJitOptionFallbackStrategy: + os << "hipJitOptionFallbackStrategy"; + break; + case hipJitOptionGenerateDebugInfo: + os << "hipJitOptionGenerateDebugInfo"; + break; + case hipJitOptionCacheMode: + os << "hipJitOptionCacheMode"; + break; + case hipJitOptionSm3xOpt: + os << "hipJitOptionSm3xOpt"; + break; + case hipJitOptionFastCompile: + os << "hipJitOptionFastCompile"; + break; + case hipJitOptionNumOptions: + os << "hipJitOptionNumOptions"; + break; + default: + os << "hipJitOptionMaxRegisters"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipFuncCache_t& s) { + switch (s) { + case hipFuncCachePreferNone: + os << "hipFuncCachePreferNone"; + break; + case hipFuncCachePreferShared: + os << "hipFuncCachePreferShared"; + break; + case hipFuncCachePreferL1: + os << "hipFuncCachePreferL1"; + break; + case hipFuncCachePreferEqual: + os << "hipFuncCachePreferEqual"; + break; + default: + os << "hipFuncCachePreferNone"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipSharedMemConfig& s) { + switch (s) { + case hipSharedMemBankSizeDefault: + os << "hipSharedMemBankSizeDefault"; + break; + case hipSharedMemBankSizeFourByte: + os << "hipSharedMemBankSizeFourByte"; + break; + case hipSharedMemBankSizeEightByte: + os << "hipSharedMemBankSizeEightByte"; + break; + default: + os << "hipSharedMemBankSizeDefault"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipDataType& s) { + switch (s) { + case HIP_R_16F: + os << "HIP_R_16F"; + break; + case HIP_R_32F: + os << "HIP_R_32F"; + break; + case HIP_R_64F: + os << "HIP_R_64F"; + break; + case HIP_C_16F: + os << "HIP_C_16F"; + break; + case HIP_C_32F: + os << "HIP_C_32F"; + break; + case HIP_C_64F: + os << "HIP_C_64F"; + break; + default: + os << "HIP_R_16F"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipLibraryPropertyType& s) { + switch (s) { + case HIP_LIBRARY_MAJOR_VERSION: + os << "HIP_LIBRARY_MAJOR_VERSION"; + break; + case HIP_LIBRARY_MINOR_VERSION: + os << "HIP_LIBRARY_MINOR_VERSION"; + break; + case HIP_LIBRARY_PATCH_LEVEL: + os << "HIP_LIBRARY_PATCH_LEVEL"; + break; + default: + os << "HIP_LIBRARY_MAJOR_VERSION"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hip_api_id_t& s) { + os << hip_api_name(s); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hip_api_id_t* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureDesc& s) { + os << '{' + << '{' + << s.addressMode[0] + << ',' + << s.addressMode[1] + << ',' + << s.addressMode[2] + << '}' + << ',' + << s.filterMode + << ',' + << s.readMode + << ',' + << s.sRGB + << ',' + << '{' + << s.borderColor[0] + << ',' + << s.borderColor[1] + << ',' + << s.borderColor[2] + << ',' + << s.borderColor[3] + << '}' + << ',' + << s.normalizedCoords + << ',' + << s.mipmapFilterMode + << ',' + << s.mipmapLevelBias + << ',' + << s.minMipmapLevelClamp + << ',' + << s.maxMipmapLevelClamp + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureDesc* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + + +inline std::ostream& operator<<(std::ostream& os, const dim3& s) { + os << '{' + << s.x + << ',' + << s.y + << ',' + << s.z + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const dim3* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipChannelFormatDesc& s) { + os << '{' + << s.x + << ',' + << s.y + << ',' + << s.z + << ',' + << s.w + << ',' + << s.f + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipChannelFormatDesc* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipMipmappedArray& s) { + os << '{' + << s.data + << ',' + << s.desc + << ',' + << s.width + << ',' + << s.height + << ',' + << s.depth + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipMipmappedArray* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + + +inline std::ostream& operator<<(std::ostream& os, const hipResourceDesc& s) { + os << '{' + << s.resType + << ',' + << '{'; + + switch (s.resType) { + case hipResourceTypeLinear: + os << s.res.linear.devPtr + << ',' + << s.res.linear.desc + << ',' + << s.res.linear.sizeInBytes; + break; + case hipResourceTypePitch2D: + os << s.res.pitch2D.devPtr + << ',' + << s.res.pitch2D.desc + << ',' + << s.res.pitch2D.width + << ',' + << s.res.pitch2D.height + << ',' + << s.res.pitch2D.pitchInBytes; + break; + case hipResourceTypeArray: + os << s.res.array.array; + break; + case hipResourceTypeMipmappedArray: + os <(fun), arg) ? + hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRemoveApiCallback(uint32_t id) { + return callbacks_table.set_callback(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) { + return callbacks_table.set_activity(id, reinterpret_cast(fun), arg) ? + hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipRemoveActivityCallback(uint32_t id) { + return callbacks_table.set_activity(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue; +} + +hipError_t hipEnableTracing(bool enabled) { + callbacks_table.set_enabled(enabled); + return hipSuccess; +} + +const char* hipApiName(uint32_t id) { + return hip_api_name(id); +} diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp new file mode 100755 index 0000000000..9b4bd17042 --- /dev/null +++ b/vdi/hip_internal.hpp @@ -0,0 +1,297 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_SRC_HIP_INTERNAL_H +#define HIP_SRC_HIP_INTERNAL_H + +#include "vdi_common.hpp" +#include "hip_prof_api.h" +#include "trace_helper.h" +#include "utils/debug.hpp" +#include "hip_formatting.hpp" +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +/*! IHIP IPC MEMORY Structure */ +#define IHIP_IPC_MEM_HANDLE_SIZE 32 +#define IHIP_IPC_MEM_RESERVED_SIZE LP64_SWITCH(28,24) + +typedef struct ihipIpcMemHandle_st { + char ipc_handle[IHIP_IPC_MEM_HANDLE_SIZE]; ///< ipc memory handle on ROCr + size_t psize; + char reserved[IHIP_IPC_MEM_RESERVED_SIZE]; +} ihipIpcMemHandle_t; + +#ifdef _WIN32 + inline int getpid() { return _getpid(); } +#endif + +#define HIP_INIT() \ + std::call_once(hip::g_ihipInitialized, hip::init); \ + if (hip::g_device == nullptr && g_devices.size() > 0) { \ + hip::g_device = g_devices[0]; \ + } + +// This macro should be called at the beginning of every HIP API. +#define HIP_INIT_API(cid, ...) \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "%-5d: [%zx] %s ( %s )", getpid(), std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ + amd::Thread* thread = amd::Thread::current(); \ + if (!VDI_CHECK_THREAD(thread)) { \ + HIP_RETURN(hipErrorOutOfMemory); \ + } \ + HIP_INIT() \ + HIP_CB_SPAWNER_OBJECT(cid); + +#define HIP_RETURN(ret) \ + hip::g_lastError = ret; \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "%-5d: [%zx] %s: Returned %s", getpid(), std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \ + return hip::g_lastError; + +namespace hc { +class accelerator; +class accelerator_view; +}; + +namespace hip { + + /// HIP Device class + class Device { + amd::Monitor lock_{"Device lock"}; + /// VDI context + amd::Context* context_; + /// VDI host queue for default streams + amd::HostQueue* defaultStream_ = nullptr; + /// Device's ID + /// Store it here so we don't have to loop through the device list every time + int deviceId_; + //Maintain list of user enabled peers + std::list userEnabledPeers; + public: + Device(amd::Context* ctx, int devId): context_(ctx), deviceId_(devId) { assert(ctx != nullptr); } + ~Device() {} + + amd::Context* asContext() const { return context_; } + int deviceId() const { return deviceId_; } + void retain() const { context_->retain(); } + void release() const { context_->release(); } + const std::vector& devices() const { return context_->devices(); } + hipError_t EnablePeerAccess(int peerDeviceId){ + amd::ScopedLock lock(lock_); + bool found = (std::find(userEnabledPeers.begin(), userEnabledPeers.end(), peerDeviceId) != userEnabledPeers.end()); + if (found) { + return hipErrorPeerAccessAlreadyEnabled; + } + userEnabledPeers.push_back(peerDeviceId); + return hipSuccess; + } + hipError_t DisablePeerAccess(int peerDeviceId) { + amd::ScopedLock lock(lock_); + bool found = (std::find(userEnabledPeers.begin(), userEnabledPeers.end(), peerDeviceId) != userEnabledPeers.end()); + if (found) { + userEnabledPeers.remove(peerDeviceId); + return hipSuccess; + } else { + return hipErrorPeerAccessNotEnabled; + } + } + amd::HostQueue* defaultStream(); + }; + + extern std::once_flag g_ihipInitialized; + /// Current thread's device + extern thread_local Device* g_device; + extern thread_local hipError_t g_lastError; + /// Device representing the host - for pinned memory + extern Device* host_device; + + extern void init(); + + extern Device* getCurrentDevice(); + extern void setCurrentDevice(unsigned int index); + + /// Get VDI queue associated with hipStream + /// Note: This follows the CUDA spec to sync with default streams + /// and Blocking streams + extern amd::HostQueue* getQueue(hipStream_t s); + /// Get default stream associated with the VDI context + extern amd::HostQueue* getNullStream(amd::Context&); + /// Get default stream of the thread + extern amd::HostQueue* getNullStream(); + /// Sync Blocking streams on the current device + extern void syncStreams(); + /// Sync blocking streams on the given device + extern void syncStreams(int devId); + + + struct Function { + amd::Kernel* function_; + amd::Monitor lock_; + + Function(amd::Kernel* f) : function_(f), lock_("function lock") {} + hipFunction_t asHipFunction() { return reinterpret_cast(this); } + + static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } + }; + + struct Stream { + amd::HostQueue* queue; + amd::Monitor lock; + Device* device; + amd::CommandQueue::Priority priority; + unsigned int flags; + + Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f); + void create(); + amd::HostQueue* asHostQueue(); + void destroy(); + void finish(); + }; + +}; + +struct ihipExec_t { + dim3 gridDim_; + dim3 blockDim_; + size_t sharedMem_; + hipStream_t hStream_; + std::vector arguments_; +}; + +class PlatformState { + amd::Monitor lock_{"Guards global function map"}; + + std::unordered_map>> modules_; + bool initialized_{false}; + + void digestFatBinary(const void* data, std::vector>& programs); +public: + void init(); + std::vector>* addFatBinary(const void*data) + { + if (initialized_) { + digestFatBinary(data, modules_[data]); + } + return &modules_[data]; + } + void removeFatBinary(std::vector>* module) + { + for (auto& mod : modules_) { + if (&mod.second == module) { + modules_.erase(&mod); + return; + } + } + } + + struct RegisteredVar { + public: + RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} + ~RegisteredVar() {} + + hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; + size_t getvarsize() const { return size_; }; + + size_t size_; // Size of the variable + hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. + amd::Memory* amd_mem_obj_; + }; + + struct DeviceFunction { + std::string deviceName; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector functions; + }; + struct DeviceVar { + void* shadowVptr; + std::string hostVar; + size_t size; + std::vector< std::pair< hipModule_t, bool > >* modules; + std::vector rvars; + bool dyn_undef; + }; +private: + class Module { + public: + Module(hipModule_t hip_module_) : hip_module(hip_module_) {} + std::unordered_map functions_; + private: + hipModule_t hip_module; + }; + std::unordered_map module_map_; + + std::unordered_map functions_; + std::unordered_multimap vars_; + // Map from the host shadow symbol to its device name. + std::unordered_map symbols_; + + static PlatformState* platform_; + + PlatformState() {} + ~PlatformState() {} +public: + static PlatformState& instance() { + return *platform_; + } + + bool unregisterFunc(hipModule_t hmod); + std::vector< std::pair >* unregisterVar(hipModule_t hmod); + + + bool findSymbol(const void *hostVar, std::string &devName); + PlatformState::DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod); + void registerVarSym(const void *hostVar, const char *symbolName); + void registerVar(const char* symbolName, const DeviceVar& var); + void registerFunction(const void* hostFunction, const DeviceFunction& func); + + bool registerModFuncs(std::vector& func_names, hipModule_t* module); + bool findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name); + bool createFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name); + hipFunction_t getFunc(const void* hostFunction, int deviceId); + bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr); + bool getGlobalVar(const char* hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr); + bool getTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef); + + bool getShadowVarInfo(std::string var_name, hipModule_t hmod, + void** var_addr, size_t* var_size); + void setupArgument(const void *arg, size_t size, size_t offset); + void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); + + void popExec(ihipExec_t& exec); + +}; + +extern std::vector g_devices; +extern hipError_t ihipDeviceGetCount(int* count); +extern int ihipGetDevice(); +extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags); +extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); +extern bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, + size_t* var_size); + +#endif // HIP_SRC_HIP_INTERNAL_H diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp new file mode 100644 index 0000000000..eb56b69bd2 --- /dev/null +++ b/vdi/hip_memory.cpp @@ -0,0 +1,2188 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "hip_conversions.hpp" +#include "platform/context.hpp" +#include "platform/command.hpp" +#include "platform/memory.hpp" + +amd::Memory* getMemoryObject(const void* ptr, size_t& offset) { + amd::Memory *memObj = amd::MemObjMap::FindMemObj(ptr); + if (memObj != nullptr) { + if (memObj->getSvmPtr() != nullptr) { + // SVM pointer + offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr()); + } else if (memObj->getHostMem() != nullptr) { + // Prepinned memory + offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getHostMem()); + } else { + ShouldNotReachHere(); + } + } + return memObj; +} + +hipError_t ihipFree(void *ptr) +{ + if (ptr == nullptr) { + return hipSuccess; + } + if (amd::SvmBuffer::malloced(ptr)) { + for (auto& dev : g_devices) { + amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); + if (queue != nullptr) { + queue->finish(); + } + hip::syncStreams(dev->deviceId()); + } + amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr); + return hipSuccess; + } + return hipErrorInvalidValue; +} + +hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) +{ + if (sizeBytes == 0) { + *ptr = nullptr; + return hipSuccess; + } + else if (ptr == nullptr) { + return hipErrorInvalidValue; + } + + amd::Context* amdContext = ((flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0)? + hip::host_device->asContext() : hip::getCurrentDevice()->asContext(); + + if (amdContext == nullptr) { + return hipErrorOutOfMemory; + } + + if (amdContext->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { + return hipErrorOutOfMemory; + } + + *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); + if (*ptr == nullptr) { + return hipErrorOutOfMemory; + } + ClPrint(amd::LOG_INFO, amd::LOG_API, "%-5d: [%zx] ihipMalloc ptr=0x%zx", getpid(),std::this_thread::get_id(), *ptr); + return hipSuccess; +} + +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + amd::HostQueue& queue, bool isAsync = false) { + if (sizeBytes == 0) { + // Skip if nothing needs writing. + return hipSuccess; + } + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + + size_t sOffset = 0; + amd::Memory *srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory *dstMemory = getMemoryObject(dst, dOffset); + amd::Device* queueDevice = &queue.device(); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (kind == hipMemcpyHostToHost)) { + queue.finish(); + memcpy(dst, src, sizeBytes); + return hipSuccess; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + amd::HostQueue* pQueue = &queue; + if (queueDevice != dstMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(dstMemory->getContext()); + waitList.push_back(queue.getLastQueuedCommand(true)); + } + command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList, + *dstMemory->asBuffer(), dOffset, sizeBytes, src); + isAsync = false; + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + amd::HostQueue* pQueue = &queue; + if (queueDevice != srcMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(srcMemory->getContext()); + waitList.push_back(queue.getLastQueuedCommand(true)); + } + command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList, + *srcMemory->asBuffer(), sOffset, sizeBytes, dst); + isAsync = false; + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + if (queueDevice != srcMemory->getContext().devices()[0]) { + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + return hipSuccess; + } + if (queueDevice != dstMemory->getContext().devices()[0]) { + amd::Coord3D srcOffset(sOffset, 0, 0); + amd::Coord3D dstOffset(dOffset, 0, 0); + amd::Coord3D copySize(sizeBytes, 1, 1); + command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, copySize); + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + return hipSuccess; + } + command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); + } + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + if (waitList.size() > 0) { + waitList[0]->release(); + } + + return hipSuccess; +} + +hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipExtMallocWithFlags, ptr, sizeBytes, flags); + + if (flags != hipDeviceMallocDefault && + flags != hipDeviceMallocFinegrained) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, (flags & hipDeviceMallocFinegrained)? CL_MEM_SVM_ATOMICS: 0)); +} + +hipError_t hipMalloc(void** ptr, size_t sizeBytes) { + HIP_INIT_API(hipMalloc, ptr, sizeBytes); + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, 0)); +} + +hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostMalloc, ptr, sizeBytes, flags); + + if (ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *ptr = nullptr; + + const unsigned int coherentFlags = hipHostMallocCoherent | hipHostMallocNonCoherent; + + // can't have both Coherent and NonCoherent flags set at the same time + if ((flags & coherentFlags) == coherentFlags) { + HIP_RETURN(hipErrorInvalidValue); + } + + unsigned int ihipFlags = CL_MEM_SVM_FINE_GRAIN_BUFFER | (flags << 16); + if (flags == 0 || + flags & (hipHostMallocCoherent | hipHostMallocMapped) || + (!(flags & hipHostMallocNonCoherent) && HIP_HOST_COHERENT)) { + ihipFlags |= CL_MEM_SVM_ATOMICS; + } + + HIP_RETURN(ihipMalloc(ptr, sizeBytes, ihipFlags)); +} + +hipError_t hipMallocManaged(void** devPtr, size_t size, + unsigned int flags) { + HIP_INIT_API(hipMallocManaged, devPtr, size, flags); + + if (flags != hipMemAttachGlobal) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(devPtr, size, CL_MEM_SVM_FINE_GRAIN_BUFFER)); +} + +hipError_t hipFree(void* ptr) { + HIP_INIT_API(hipFree, ptr); + + HIP_RETURN(ihipFree(ptr)); +} + +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind); + + hip::syncStreams(); + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue)); +} + +hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyWithStream, dst, src, sizeBytes, kind, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, false)); +} + +hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { + HIP_INIT_API(hipMemPtrGetInfo, ptr, size); + + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *size = svmMem->getSize(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipHostFree(void* ptr) { + HIP_INIT_API(hipHostFree, ptr); + + HIP_RETURN(ihipFree(ptr)); +} + +hipError_t ihipArrayDestroy(hipArray* array) { + if (array == nullptr) { + return hipErrorInvalidValue; + } + + cl_mem memObj = reinterpret_cast(array->data); + if (is_valid(memObj) == false) { + return hipErrorInvalidValue; + } + for (auto& dev : g_devices) { + amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); + if (queue != nullptr) { + queue->finish(); + } + hip::syncStreams(dev->deviceId()); + } + as_amd(memObj)->release(); + + delete array; + + return hipSuccess; +} + +hipError_t hipFreeArray(hipArray* array) { + HIP_INIT_API(hipFreeArray, array); + + HIP_RETURN(ihipArrayDestroy(array)); +} + +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) { + HIP_INIT_API(hipMemGetAddressRange, pbase, psize, dptr); + + // Since we are using SVM buffer DevicePtr and HostPtr is the same + void* ptr = dptr; + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + *pbase = svmMem->getSvmPtr(); + *psize = svmMem->getSize(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemGetInfo(size_t* free, size_t* total) { + HIP_INIT_API(hipMemGetInfo, free, total); + + size_t freeMemory[2]; + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + if(device == nullptr) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if(!device->globalFreeMemory(freeMemory)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *free = freeMemory[0] * Ki; + *total = device->info().globalMemSize_; + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth, + cl_mem_object_type imageType, const cl_image_format* image_format) { + + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + + if (ptr == nullptr) { + return hipErrorInvalidValue; + } + + if ((width == 0) || (height == 0)) { + *ptr = nullptr; + return hipSuccess; + } + + const amd::Image::Format imageFormat(*image_format); + + *pitch = amd::alignUp(width * imageFormat.getElementSize(), device->info().imagePitchAlignment_); + + size_t sizeBytes = *pitch * height * depth; + + if (device->info().maxMemAllocSize_ < sizeBytes) { + return hipErrorOutOfMemory; + } + + *ptr = amd::SvmBuffer::malloc(*hip::getCurrentDevice()->asContext(), 0, sizeBytes, + device->info().memBaseAddrAlign_); + + if (*ptr == nullptr) { + return hipErrorOutOfMemory; + } + + return hipSuccess; +} + + +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { + HIP_INIT_API(hipMallocPitch, ptr, pitch, width, height); + + const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; + HIP_RETURN(ihipMallocPitch(ptr, pitch, width, height, 1, CL_MEM_OBJECT_IMAGE2D, &image_format)); +} + +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { + HIP_INIT_API(hipMalloc3D, pitchedDevPtr, extent); + + size_t pitch = 0; + + if (pitchedDevPtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const cl_image_format image_format = { CL_R, CL_UNSIGNED_INT8 }; + hipError_t status = hipSuccess; + status = ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth, + CL_MEM_OBJECT_IMAGE3D, &image_format); + + if (status == hipSuccess) { + pitchedDevPtr->pitch = pitch; + pitchedDevPtr->xsize = extent.width; + pitchedDevPtr->ysize = extent.height; + } + + HIP_RETURN(status); +} + +amd::Image* ihipImageCreate(const cl_channel_order channelOrder, + const cl_channel_type channelType, + const cl_mem_object_type imageType, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const size_t imageArraySize, + const size_t imageRowPitch, + const size_t imageSlicePitch, + const uint32_t numMipLevels, + amd::Memory* buffer) { + const amd::Image::Format imageFormat({channelOrder, channelType}); + if (!imageFormat.isValid()) { + return nullptr; + } + + amd::Context& context = *hip::getCurrentDevice()->asContext(); + if (!imageFormat.isSupported(context, imageType)) { + return nullptr; + } + + const std::vector& devices = context.devices(); + if (!devices[0]->info().imageSupport_) { + return nullptr; + } + + if (!amd::Image::validateDimensions(devices, + imageType, + imageWidth, + imageHeight, + imageDepth, + imageArraySize)) { + return nullptr; + } + + // TODO validate the image descriptor. + + amd::Image* image = nullptr; + if (buffer != nullptr) { + switch (imageType) { + case CL_MEM_OBJECT_IMAGE1D_BUFFER: + case CL_MEM_OBJECT_IMAGE2D: + image = new (context) amd::Image(*buffer->asBuffer(), + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + (imageHeight == 0) ? 1 : imageHeight, + (imageDepth == 0) ? 1 : imageDepth, + imageRowPitch, + imageSlicePitch); + break; + default: + ShouldNotReachHere(); + } + } else { + switch (imageType) { + case CL_MEM_OBJECT_IMAGE1D: + case CL_MEM_OBJECT_IMAGE2D: + case CL_MEM_OBJECT_IMAGE3D: + image = new (context) amd::Image(context, + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + (imageHeight == 0) ? 1 : imageHeight, + (imageDepth == 0) ? 1 : imageDepth, + imageWidth * imageFormat.getElementSize(), /* row pitch */ + imageWidth * imageHeight * imageFormat.getElementSize(), /* slice pitch */ + numMipLevels); + break; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + image = new (context) amd::Image(context, + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + imageArraySize, + 1, /* image depth */ + imageWidth * imageFormat.getElementSize(), + imageWidth * imageHeight * imageFormat.getElementSize(), + numMipLevels); + break; + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + image = new (context) amd::Image(context, + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + imageHeight, + imageArraySize, + imageWidth * imageFormat.getElementSize(), + imageWidth * imageHeight * imageFormat.getElementSize(), + numMipLevels); + break; + default: + ShouldNotReachHere(); + } + } + + if (image == nullptr) { + return nullptr; + } + + if (!image->create(nullptr)) { + delete image; + return nullptr; + } + + return image; +} + +hipError_t ihipArrayCreate(hipArray** array, + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray, + unsigned int numMipmapLevels) { + // NumChannels specifies the number of packed components per HIP array element; it may be 1, 2, or 4; + if ((pAllocateArray->NumChannels != 1) && + (pAllocateArray->NumChannels != 2) && + (pAllocateArray->NumChannels != 4)) { + return hipErrorInvalidValue; + } + + if ((pAllocateArray->Flags & hipArraySurfaceLoadStore) || + (pAllocateArray->Flags & hipArrayCubemap) || + (pAllocateArray->Flags & hipArrayTextureGather)) { + return hipErrorNotSupported; + } + + const cl_channel_order channelOrder = hip::getCLChannelOrder(pAllocateArray->NumChannels, 0); + const cl_channel_type channelType = hip::getCLChannelType(pAllocateArray->Format, hipReadModeElementType); + const cl_mem_object_type imageType = hip::getCLMemObjectType(pAllocateArray->Width, + pAllocateArray->Height, + pAllocateArray->Depth, + pAllocateArray->Flags); + + amd::Image* image = ihipImageCreate(channelOrder, + channelType, + imageType, + pAllocateArray->Width, + pAllocateArray->Height, + pAllocateArray->Depth, + // The number of layers is determined by the depth extent. + pAllocateArray->Depth, /* array size */ + 0, /* row pitch */ + 0, /* slice pitch */ + numMipmapLevels, + nullptr /* buffer */); + + if (image == nullptr) { + return hipErrorInvalidValue; + } + + cl_mem memObj = as_cl(image); + *array = new hipArray{reinterpret_cast(memObj)}; + + // It is UB to call hipGet*() on an array created via hipArrayCreate()/hipArray3DCreate(). + // This is due to hip not differentiating between runtime and driver types. + // TODO change the hipArray struct in driver_types.h. + (*array)->desc = hip::getChannelFormatDesc(pAllocateArray->NumChannels, pAllocateArray->Format); + (*array)->width = pAllocateArray->Width; + (*array)->height = pAllocateArray->Height; + (*array)->depth = pAllocateArray->Depth; + (*array)->Format = pAllocateArray->Format; + (*array)->NumChannels = pAllocateArray->NumChannels; + + return hipSuccess; +} + +hipError_t hipArrayCreate(hipArray** array, + const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { + HIP_INIT_API(hipArrayCreate, array, pAllocateArray); + + HIP_ARRAY3D_DESCRIPTOR desc = {pAllocateArray->Width, + pAllocateArray->Height, + 0, /* Depth */ + pAllocateArray->Format, + pAllocateArray->NumChannels, + hipArrayDefault /* Flags */}; + + HIP_RETURN(ihipArrayCreate(array, &desc, 0)); +} + + +hipError_t hipMallocArray(hipArray** array, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + unsigned int flags) { + HIP_INIT_API(hipMallocArray, array, desc, width, height, flags); + + HIP_ARRAY3D_DESCRIPTOR allocateArray = {width, + height, + 0, /* Depth */ + hip::getArrayFormat(*desc), + hip::getNumChannels(*desc), + flags}; + + HIP_RETURN(ihipArrayCreate(array, &allocateArray, 0 /* numMipLevels */)); +} + +hipError_t hipArray3DCreate(hipArray** array, + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray) { + HIP_INIT_API(hipArray3DCreate, array, pAllocateArray); + + HIP_RETURN(ihipArrayCreate(array, pAllocateArray, 0 /* numMipLevels */)); +} + +hipError_t hipMalloc3DArray(hipArray_t* array, + const hipChannelFormatDesc* desc, + hipExtent extent, + unsigned int flags) { + HIP_INIT_API(hipMalloc3DArray, array, desc, extent, flags); + + HIP_ARRAY3D_DESCRIPTOR allocateArray = {extent.width, + extent.height, + extent.depth, + hip::getArrayFormat(*desc), + hip::getNumChannels(*desc), + flags}; + + HIP_RETURN(ihipArrayCreate(array, &allocateArray, 0)); +} + +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { + HIP_INIT_API(hipHostGetFlags, flagsPtr, hostPtr); + + if (flagsPtr == nullptr || + hostPtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(hostPtr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *flagsPtr = svmMem->getMemFlags() >> 16; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags); + if(hostPtr != nullptr) { + amd::Memory* mem = new (*hip::host_device->asContext()) amd::Buffer(*hip::host_device->asContext(), CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS, sizeBytes); + + constexpr bool sysMemAlloc = false; + constexpr bool skipAlloc = false; + constexpr bool forceAlloc = true; + if (!mem->create(hostPtr, sysMemAlloc, skipAlloc, forceAlloc)) { + mem->release(); + HIP_RETURN(hipErrorOutOfMemory); + } + + for (const auto& device: hip::getCurrentDevice()->devices()) { + // Since the amd::Memory object is shared between all devices + // it's fine to have multiple addresses mapped to it + const device::Memory* devMem = mem->getDeviceMemory(*device); + amd::MemObjMap::AddMemObj(reinterpret_cast(devMem->virtualAddress()), mem); + } + + amd::MemObjMap::AddMemObj(hostPtr, mem); + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(ihipMalloc(&hostPtr, sizeBytes, flags)); + } +} + +hipError_t hipHostUnregister(void* hostPtr) { + HIP_INIT_API(hipHostUnregister, hostPtr); + + for (auto& dev : g_devices) { + amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); + if (queue != nullptr) { + queue->finish(); + } + hip::syncStreams(dev->deviceId()); + } + + if (amd::SvmBuffer::malloced(hostPtr)) { + amd::SvmBuffer::free(*hip::host_device->asContext(), hostPtr); + HIP_RETURN(hipSuccess); + } else { + size_t offset = 0; + amd::Memory* mem = getMemoryObject(hostPtr, offset); + + if(mem) { + for (const auto& device: hip::getCurrentDevice()->devices()) { + const device::Memory* devMem = mem->getDeviceMemory(*device); + amd::MemObjMap::RemoveMemObj(reinterpret_cast(devMem->virtualAddress())); + } + amd::MemObjMap::RemoveMemObj(hostPtr); + mem->release(); + HIP_RETURN(hipSuccess); + } + } + + HIP_RETURN(hipErrorInvalidValue); +} + +// Deprecated function: +hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_RETURN(ihipMalloc(ptr, sizeBytes, flags)); +}; + + +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t count, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyToSymbol, symbol, src, count, offset, kind); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(device_ptr, src, count, kind)); +} + +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, count, offset, kind); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(dst, device_ptr, count, kind)); +} + +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t count, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyToSymbolAsync, symbol, src, count, offset, kind, stream); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(device_ptr, src, count, kind, stream)); +} + +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyFromSymbolAsync, symbol, dst, count, offset, kind, stream); + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &device_ptr, &sym_size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorInvalidDevicePointer); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(dst, device_ptr, count, kind, stream)); +} + +hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, + void* srcHost, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyHtoD, dstDevice, srcHost, ByteCount); + + HIP_RETURN(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *hip::getQueue(nullptr))); +} + +hipError_t hipMemcpyDtoH(void* dstHost, + hipDeviceptr_t srcDevice, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyDtoH, dstHost, srcDevice, ByteCount); + + HIP_RETURN(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *hip::getQueue(nullptr))); +} + +hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice, + hipDeviceptr_t srcDevice, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyDtoD, dstDevice, srcDevice, ByteCount); + + HIP_RETURN(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *hip::getQueue(nullptr))); +} + +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyAsync, dst, src, sizeBytes, kind, stream); + + amd::HostQueue* queue = hip::getQueue(stream); + + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue, true)); +} + +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, + void* srcHost, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyHtoDAsync, dstDevice, srcHost, ByteCount, stream); + + HIP_RETURN(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *hip::getQueue(stream), true)); +} + +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, + hipDeviceptr_t srcDevice, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyDtoDAsync, dstDevice, srcDevice, ByteCount, stream); + + HIP_RETURN(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *hip::getQueue(stream), true)); +} + +hipError_t hipMemcpyDtoHAsync(void* dstHost, + hipDeviceptr_t srcDevice, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyDtoHAsync, dstHost, srcDevice, ByteCount, stream); + + HIP_RETURN(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *hip::getQueue(stream), true)); +} + +hipError_t ihipMemcpyAtoD(hipArray* srcArray, + void* dstDevice, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + cl_mem srcMemObj = reinterpret_cast(srcArray->data); + if (is_valid(srcMemObj) == false) { + return hipErrorInvalidValue; + } + + amd::Image* srcImage = as_amd(srcMemObj)->asImage(); + size_t dstOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstDevice, dstOffset); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcImage->getRowPitch(), srcImage->getSlicePitch())) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + dstRect.start_ += dstOffset; + dstRect.end_ += dstOffset; + + const size_t copySizeInBytes = copyRegion[0] * copyRegion[1] * copyRegion[2] * srcImage->getImageFormat().getElementSize(); + if (!srcImage->validateRegion(srcOrigin, copyRegion) || + !dstMemory->validateRegion(dstOrigin, {copySizeInBytes, 0, 0})) { + return hipErrorInvalidValue; + } + + amd::CopyMemoryCommand* command = new amd::CopyMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_COPY_IMAGE_TO_BUFFER, + amd::Command::EventWaitList{}, + *srcImage, + *dstMemory, + srcOrigin, + dstOrigin, + copyRegion, + srcRect, + dstRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyDtoA(void* srcDevice, + hipArray* dstArray, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + hipStream_t stream, + bool isAsync = false) { + cl_mem dstMemObj = reinterpret_cast(dstArray->data); + if (is_valid(dstMemObj) == false) { + return hipErrorInvalidValue; + } + + size_t srcOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcDevice, srcOffset); + amd::Image* dstImage = as_amd(dstMemObj)->asImage(); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + srcRect.start_ += srcOffset; + srcRect.end_ += srcOffset; + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstImage->getRowPitch(), dstImage->getSlicePitch())) { + return hipErrorInvalidValue; + } + + const size_t copySizeInBytes = copyRegion[0] * copyRegion[1] * copyRegion[2] * dstImage->getImageFormat().getElementSize(); + if (!srcMemory->validateRegion(srcOrigin, {copySizeInBytes, 0, 0}) || + !dstImage->validateRegion(dstOrigin, copyRegion)) { + return hipErrorInvalidValue; + } + + amd::CopyMemoryCommand* command = new amd::CopyMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_COPY_BUFFER_TO_IMAGE, + amd::Command::EventWaitList{}, + *srcMemory, + *dstImage, + srcOrigin, + dstOrigin, + copyRegion, + srcRect, + dstRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyDtoD(void* srcDevice, + void* dstDevice, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + size_t srcOffset = 0; + amd::Memory *srcMemory = getMemoryObject(srcDevice, srcOffset); + size_t dstOffset = 0; + amd::Memory *dstMemory = getMemoryObject(dstDevice, dstOffset); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + srcRect.start_ += srcOffset; + srcRect.end_ += srcOffset; + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + dstRect.start_ += dstOffset; + dstRect.end_ += dstOffset; + + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { + return hipErrorInvalidValue; + } + + amd::CopyMemoryCommand* command = new amd::CopyMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_COPY_BUFFER_RECT, + amd::Command::EventWaitList{}, + *srcMemory, + *dstMemory, + srcStart, + dstStart, + copyRegion, + srcRect, + dstRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyDtoH(void* srcDevice, + void* dstHost, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + size_t srcOffset = 0; + amd::Memory *srcMemory = getMemoryObject(srcDevice, srcOffset); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + srcRect.start_ += srcOffset; + srcRect.end_ += srcOffset; + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + + amd::ReadMemoryCommand* command = new amd::ReadMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_READ_BUFFER_RECT, + amd::Command::EventWaitList{}, + *srcMemory, + srcStart, + copyRegion, + dstHost, + srcRect, + dstRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyHtoD(const void* srcHost, + void* dstDevice, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + size_t dstOffset = 0; + amd::Memory *dstMemory = getMemoryObject(dstDevice, dstOffset); + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + dstRect.start_ += dstOffset; + dstRect.end_ += dstOffset; + + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { + return hipErrorInvalidValue; + } + + amd::WriteMemoryCommand* command = new amd::WriteMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_WRITE_BUFFER_RECT, + amd::Command::EventWaitList{}, + *dstMemory, + dstStart, + copyRegion, + srcHost, + dstRect, + srcRect); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyHtoH(const void* srcHost, + void* dstHost, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + size_t dstRowPitch, + size_t dstSlicePitch) { + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + + for (size_t slice = 0; slice < copyRegion[2]; slice++) { + for (size_t row = 0; row < copyRegion[1]; row++) { + const void* srcRow = static_cast(srcHost) + srcRect.start_ + row * srcRect.rowPitch_ + slice * srcRect.slicePitch_; + void* dstRow = static_cast(dstHost) + dstRect.start_ + row * dstRect.rowPitch_ + slice * dstRect.slicePitch_; + std::memcpy(dstRow, srcRow, copyRegion[0]); + } + } + + return hipSuccess; +} + +hipError_t ihipMemcpyAtoA(hipArray* srcArray, + hipArray* dstArray, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + hipStream_t stream, + bool isAsync = false) { + cl_mem srcMemObj = reinterpret_cast(srcArray->data); + cl_mem dstMemObj = reinterpret_cast(dstArray->data); + if (!is_valid(srcMemObj) || !is_valid(dstMemObj)) { + return hipErrorInvalidValue; + } + + amd::Image* srcImage = as_amd(srcMemObj)->asImage(); + amd::Image* dstImage = as_amd(dstMemObj)->asImage(); + + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + // Note that src and dst should have the same element size. + assert(srcImage->getImageFormat().getElementSize() == dstImage->getImageFormat().getElementSize()); + const size_t elementSize = srcImage->getImageFormat().getElementSize(); + static_cast(srcOrigin)[0] /= elementSize; + static_cast(dstOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + if (!srcImage->validateRegion(srcOrigin, copyRegion) || + !dstImage->validateRegion(dstOrigin, copyRegion)) { + return hipErrorInvalidValue; + } + + amd::CopyMemoryCommand* command = new amd::CopyMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_COPY_IMAGE, + amd::Command::EventWaitList{}, + *srcImage, + *dstImage, + srcOrigin, + dstOrigin, + copyRegion); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyHtoA(const void* srcHost, + hipArray* dstArray, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t srcRowPitch, + size_t srcSlicePitch, + hipStream_t stream, + bool isAsync = false) { + if (srcHost == nullptr) { + return hipErrorInvalidValue; + } + + cl_mem dstMemObj = reinterpret_cast(dstArray->data); + if (is_valid(dstMemObj) == false) { + return hipErrorInvalidValue; + } + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + + amd::Image* dstImage = as_amd(dstMemObj)->asImage(); + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + const size_t elementSize = dstImage->getImageFormat().getElementSize(); + static_cast(dstOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + if (!dstImage->validateRegion(dstOrigin, copyRegion)) { + return hipErrorInvalidValue; + } + + amd::WriteMemoryCommand* command = new amd::WriteMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_WRITE_IMAGE, + amd::Command::EventWaitList{}, + *dstImage, + dstOrigin, + copyRegion, + static_cast(srcHost) + srcRect.start_, + srcRowPitch, + srcSlicePitch); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyAtoH(hipArray* srcArray, + void* dstHost, + amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, + size_t dstRowPitch, + size_t dstSlicePitch, + hipStream_t stream, + bool isAsync = false) { + cl_mem srcMemObj = reinterpret_cast(srcArray->data); + if (!is_valid(srcMemObj)) { + return hipErrorInvalidValue; + } + + if (dstHost == nullptr) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + + + amd::Image* srcImage = as_amd(srcMemObj)->asImage(); + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + const size_t elementSize = srcImage->getImageFormat().getElementSize(); + static_cast(srcOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + if (!srcImage->validateRegion(srcOrigin, copyRegion) || + !srcImage->isRowSliceValid(dstRowPitch, dstSlicePitch, copyRegion[0], copyRegion[1])) { + return hipErrorInvalidValue; + } + + amd::ReadMemoryCommand* command = new amd::ReadMemoryCommand(*hip::getQueue(stream), + CL_COMMAND_READ_IMAGE, + amd::Command::EventWaitList{}, + *srcImage, + srcOrigin, + copyRegion, + static_cast(dstHost) + dstRect.start_, + dstRowPitch, + dstSlicePitch); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpyParam3D(const HIP_MEMCPY3D* pCopy, + hipStream_t stream, + bool isAsync = false) { + // If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the (unified virtual address space) + // base address of the source data and the bytes per row to apply. {src/dst}Array is ignored. + hipMemoryType srcMemoryType = pCopy->srcMemoryType; + if (srcMemoryType == hipMemoryTypeUnified) { + srcMemoryType = amd::MemObjMap::FindMemObj(pCopy->srcDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost; + } + hipMemoryType dstMemoryType = pCopy->dstMemoryType; + if (dstMemoryType == hipMemoryTypeUnified) { + dstMemoryType = amd::MemObjMap::FindMemObj(pCopy->dstDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost; + } + + amd::Coord3D srcOrigin = {pCopy->srcXInBytes, pCopy->srcY, pCopy->srcZ}; + amd::Coord3D dstOrigin = {pCopy->dstXInBytes, pCopy->dstY, pCopy->dstZ}; + amd::Coord3D copyRegion = {pCopy->WidthInBytes, (pCopy->Height != 0) ? pCopy->Height : 1, (pCopy->Depth != 0) ? pCopy->Depth : 1}; + + if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeHost)) { + // Host to Host. + return ihipMemcpyHtoH(pCopy->srcHost, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight); + } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) { + // Host to Device. + return ihipMemcpyHtoD(pCopy->srcHost, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) { + // Device to Host. + return ihipMemcpyDtoH(pCopy->srcDevice, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { + // Device to Device. + return ihipMemcpyDtoD(pCopy->srcDevice, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) { + // Host to Image. + return ihipMemcpyHtoA(pCopy->srcHost, pCopy->dstArray, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) { + // Image to Host. + return ihipMemcpyAtoH(pCopy->srcArray, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) { + // Device to Image. + return ihipMemcpyDtoA(pCopy->srcDevice, pCopy->dstArray, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) { + // Image to Device. + return ihipMemcpyAtoD(pCopy->srcArray, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream, isAsync); + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) { + // Image to Image. + return ihipMemcpyAtoA(pCopy->srcArray, pCopy->dstArray, srcOrigin, dstOrigin, copyRegion, stream, isAsync); + } else { + ShouldNotReachHere(); + } + + return hipSuccess; +} + +hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, + hipStream_t stream, + bool isAsync = false) { + HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*pCopy); + + return ihipMemcpyParam3D(&desc, stream, isAsync); +} + +hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = 0; + desc.srcY = 0; + desc.srcMemoryType = std::get<0>(hip::getMemoryType(kind)); + desc.srcHost = src; + desc.srcDevice = const_cast(src); + desc.srcArray = nullptr; // Ignored. + desc.srcPitch = spitch; + + desc.dstXInBytes = 0; + desc.dstY = 0; + desc.dstMemoryType = std::get<1>(hip::getMemoryType(kind)); + desc.dstHost = dst; + desc.dstDevice = dst; + desc.dstArray = nullptr; // Ignored. + desc.dstPitch = dpitch; + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { + HIP_INIT_API(hipMemcpyParam2D, pCopy); + + HIP_RETURN(ihipMemcpyParam2D(pCopy, nullptr)); +} + +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2D, dst, dpitch, src, spitch, width, height, kind); + + HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, nullptr)); +} + +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DAsync, dst, dpitch, src, spitch, width, height, kind, stream); + + HIP_RETURN(ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, stream, true)); +} + +hipError_t ihipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = 0; + desc.srcY = 0; + desc.srcMemoryType = std::get<0>(hip::getMemoryType(kind)); + desc.srcHost = const_cast(src); + desc.srcDevice = const_cast(src); + desc.srcArray = nullptr; + desc.srcPitch = spitch; + + desc.dstXInBytes = wOffset; + desc.dstY = hOffset; + desc.dstMemoryType = hipMemoryTypeArray; + desc.dstHost = nullptr; + desc.dstDevice = nullptr; + desc.dstArray = dst; + desc.dstPitch = 0; // Ignored. + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind); + + HIP_RETURN(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind, nullptr)); +} + +hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyToArray, dst, wOffset, hOffset, src, count, kind); + + if (dst == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (dst->height != 0) ? dst->height : 1; + const size_t witdthInBytes = count / arrayHeight; + + const size_t height = (count / dst->width) / hip::getElementSize(dst); + + HIP_RETURN(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, 0 /* spitch */, witdthInBytes, height, kind, nullptr)); +} + +hipError_t ihipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = wOffsetSrc; + desc.srcY = hOffsetSrc; + desc.srcMemoryType = hipMemoryTypeArray; + desc.srcHost = nullptr; + desc.srcDevice = nullptr; + desc.srcArray = const_cast(src); + desc.srcPitch = 0; // Ignored. + + desc.dstXInBytes = 0; + desc.dstY = 0; + desc.dstMemoryType = std::get<1>(hip::getMemoryType(kind)); + desc.dstHost = dst; + desc.dstDevice = dst; + desc.dstArray = nullptr; + desc.dstPitch = dpitch; + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromArray, dst, src, wOffsetSrc, hOffset, count, kind); + + if (src == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (src->height != 0) ? src->height : 1; + const size_t witdthInBytes = count / arrayHeight; + + const size_t height = (count / src->width) / hip::getElementSize(src); + + HIP_RETURN(ihipMemcpy2DFromArray(dst, 0 /* dpitch */, src, wOffsetSrc, hOffset, witdthInBytes, height, kind, nullptr)); +} + +hipError_t hipMemcpyHtoA(hipArray* dstArray, + size_t dstOffset, + const void* srcHost, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyHtoA, dstArray, dstOffset, srcHost, ByteCount); + + HIP_RETURN(ihipMemcpyHtoA(srcHost, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t hipMemcpyAtoH(void* dstHost, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoH, dstHost, srcArray, srcOffset, ByteCount); + + HIP_RETURN(ihipMemcpyAtoH(srcArray, dstHost, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, + hipStream_t stream, + bool isAsync = false) { + // The struct passed to hipMemcpy3D() must specify one of srcArray or srcPtr and one of dstArray or dstPtr. + // Passing more than one non-zero source or destination will cause hipMemcpy3D() to return an error. + if (((p->srcArray != nullptr) && (p->srcPtr.ptr != nullptr)) || + ((p->dstArray != nullptr) && (p->dstPtr.ptr != nullptr))) { + return hipErrorInvalidValue; + } + + // If the source and destination are both arrays, hipMemcpy3D() will return an error if they do not have the same element size. + if (((p->srcArray != nullptr) && (p->dstArray != nullptr)) && + (hip::getElementSize(p->dstArray) != hip::getElementSize(p->dstArray))) { + return hipErrorInvalidValue; + } + + const HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*p); + + return ihipMemcpyParam3D(&desc, stream, isAsync); +} + +hipError_t hipMemcpy3D(const hipMemcpy3DParms* p) { + HIP_INIT_API(hipMemcpy3D, p); + + HIP_RETURN(ihipMemcpy3D(p, nullptr)); +} + +hipError_t hipMemcpy3DAsync(const hipMemcpy3DParms* p, hipStream_t stream) { + HIP_INIT_API(hipMemcpy3DAsync, p, stream); + + HIP_RETURN(ihipMemcpy3D(p, stream, true)); +} + +hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) { + HIP_INIT_API(hipDrvMemcpy3D, pCopy); + + HIP_RETURN(ihipMemcpyParam3D(pCopy, nullptr)); +} + +hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { + HIP_INIT_API(hipDrvMemcpy3DAsync, pCopy, stream); + + HIP_RETURN(ihipMemcpyParam3D(pCopy, stream, true)); +} + +hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, + hipStream_t stream, bool isAsync = false) { + if (sizeBytes == 0) { + // Skip if nothing needs filling. + return hipSuccess; + } + + if (dst == nullptr) { + return hipErrorInvalidValue; + } + + size_t offset = 0; + amd::HostQueue* queue = hip::getQueue(stream); + amd::Memory* memory = getMemoryObject(dst, offset); + + if (memory != nullptr) { + // Device memory + amd::Command::EventWaitList waitList; + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, valueSize, fillOffset, fillSize); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + } else { + // Host alloced memory + memset(dst, value, sizeBytes); + } + + return hipSuccess; +} + +hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { + HIP_INIT_API(hipMemset, dst, value, sizeBytes); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, nullptr)); +} + +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemsetAsync, dst, value, sizeBytes, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), sizeBytes, stream, true)); +} + +hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t count) { + HIP_INIT_API(hipMemsetD8, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), count * sizeof(int8_t), nullptr)); +} + +hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD8Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), count * sizeof(int8_t), stream, true)); +} + +hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t count) { + HIP_INIT_API(hipMemsetD16, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int16_t), count * sizeof(int16_t), nullptr)); +} + +hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD16Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int16_t), count * sizeof(int16_t), stream, true)); +} + +hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { + HIP_INIT_API(hipMemsetD32, dst, value, count); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int32_t), count * sizeof(int32_t), nullptr)); +} + +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD32Async, dst, value, count, stream); + + HIP_RETURN(ihipMemset(dst, value, sizeof(int32_t), count * sizeof(int32_t), stream, true)); +} + +hipError_t ihipMemset3D(hipPitchedPtr pitchedDevPtr, + int value, + hipExtent extent, + hipStream_t stream, + bool isAsync = false) { + if (pitchedDevPtr.pitch == extent.width) { + return ihipMemset(pitchedDevPtr.ptr, value, sizeof(int8_t), extent.width * extent.height * extent.depth, stream, isAsync); + } + + // Workaround for cases when pitch > row untill fill kernel will be updated to support pitch. + // Fallback to filling one row at a time. + + amd::HostQueue* queue = hip::getQueue(stream); + + size_t offset = 0; + amd::Memory* memory = getMemoryObject(pitchedDevPtr.ptr, offset); + + amd::Coord3D origin(offset); + amd::Coord3D region(pitchedDevPtr.xsize, pitchedDevPtr.ysize, extent.depth); + amd::BufferRect rect; + if (!rect.create(static_cast(origin), static_cast(region), pitchedDevPtr.pitch, 0)) { + return hipErrorInvalidValue; + } + + if (memory != nullptr) { + std::vector commands; + + for (size_t slice = 0; slice < extent.depth; slice++) { + for (size_t row = 0; row < extent.height; row++) { + const size_t rowOffset = rect.offset(0, row, slice); + amd::FillMemoryCommand* command = new amd::FillMemoryCommand(*queue, + CL_COMMAND_FILL_BUFFER, + amd::Command::EventWaitList{}, + *memory->asBuffer(), + &value, + sizeof(int8_t), + amd::Coord3D{rowOffset, 0, 0}, + amd::Coord3D{extent.width, 1, 1}); + + command->enqueue(); + commands.push_back(command); + } + } + + for (auto &command: commands) { + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + } + } else { + for (size_t slice = 0; slice < extent.depth; slice++) { + for (size_t row = 0; row < extent.height; row++) { + const size_t rowOffset = rect.offset(0, row, slice); + std::memset(pitchedDevPtr.ptr, value, extent.width); + } + } + } + + return hipSuccess; +} + +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { + HIP_INIT_API(hipMemset2D, dst, pitch, value, width, height); + + HIP_RETURN(ihipMemset3D({dst, pitch, width, height}, value, {width, height, 1}, nullptr)); +} + +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, + size_t width, size_t height, hipStream_t stream) { + HIP_INIT_API(hipMemset2DAsync, dst, pitch, value, width, height, stream); + + HIP_RETURN(ihipMemset3D({dst, pitch, width, height}, value, {width, height, 1}, stream, true)); +} + +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { + HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, extent); + + HIP_RETURN(ihipMemset3D(pitchedDevPtr, value, extent, nullptr)); +} + +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream) { + HIP_INIT_API(hipMemset3DAsync, pitchedDevPtr, value, extent, stream); + + HIP_RETURN(ihipMemset3D(pitchedDevPtr, value, extent, stream, false)); +} + +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, + size_t height, unsigned int elementSizeBytes) { + HIP_INIT_API(hipMemAllocPitch, dptr, pitch, widthInBytes, height, elementSizeBytes); + + HIP_RETURN(hipMallocPitch(dptr, pitch, widthInBytes, height)); +} + +hipError_t hipMemAllocHost(void** ptr, size_t size) { + HIP_INIT_API(hipMemAllocHost, ptr, size); + + HIP_RETURN(hipHostMalloc(ptr, size, 0)); +} + +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { + HIP_INIT_API(hipIpcGetMemHandle, handle, dev_ptr); + + size_t offset = 0; + amd::Memory* amd_mem_obj = nullptr; + device::Memory* dev_mem_obj = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; + + if ((handle == nullptr) || (dev_ptr == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get AMD::Memory object corresponding to this pointer */ + amd_mem_obj = getMemoryObject(dev_ptr, offset); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Get Device::Memory object pointer */ + dev_mem_obj = amd_mem_obj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0],false); + if (dev_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Create an handle for IPC. Store the memory size inside the handle */ + ihandle = reinterpret_cast(handle); + dev_mem_obj->IpcCreate(offset, &(ihandle->psize), &(ihandle->ipc_handle)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigned int flags) { + HIP_INIT_API(hipIpcOpenMemHandle, dev_ptr, &handle, flags); + + amd::Memory* amd_mem_obj = nullptr; + amd::Device* device = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; + + if (dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Call the IPC Attach from Device class */ + device = hip::getCurrentDevice()->devices()[0]; + ihandle = reinterpret_cast(&handle); + + amd_mem_obj = device->IpcAttach(&(ihandle->ipc_handle), ihandle->psize, flags, dev_ptr); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Add the memory to the MemObjMap */ + amd::MemObjMap::AddMemObj(*dev_ptr, amd_mem_obj); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipIpcCloseMemHandle(void* dev_ptr) { + HIP_INIT_API(hipIpcCloseMemHandle, dev_ptr); + + size_t offset = 0; + amd::Device* device = nullptr; + amd::Memory* amd_mem_obj = nullptr; + + hip::syncStreams(); + hip::getNullStream()->finish(); + + if (dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get the amd::Memory object */ + amd_mem_obj = getMemoryObject(dev_ptr, offset); + if (amd_mem_obj == nullptr) { + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + /* Call IPC Detach from Device class */ + device = hip::getCurrentDevice()->devices()[0]; + if (device == nullptr) { + HIP_RETURN(hipErrorNoDevice); + } + + /* Remove the memory from MemObjMap */ + amd::MemObjMap::RemoveMemObj(amd_mem_obj); + + /* detach the memory */ + device->IpcDetach(*amd_mem_obj); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) { + HIP_INIT_API(hipHostGetDevicePointer, devicePointer, hostPointer, flags); + + size_t offset = 0; + + amd::Memory* memObj = getMemoryObject(hostPointer, offset); + if (!memObj) { + HIP_RETURN(hipErrorInvalidValue); + } +*devicePointer = reinterpret_cast(memObj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0])->virtualAddress() + offset); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { + HIP_INIT_API(hipPointerGetAttributes, attributes, ptr); + + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(ptr, offset); + int device = 0; + + if (memObj != nullptr) { + attributes->memoryType = (CL_MEM_SVM_FINE_GRAIN_BUFFER & memObj->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; + attributes->hostPointer = memObj->getSvmPtr(); + attributes->devicePointer = memObj->getSvmPtr(); + attributes->isManaged = 0; + attributes->allocationFlags = memObj->getMemFlags() >> 16; + + amd::Context* memObjCtx = &memObj->getContext(); + if (hip::host_device->asContext() == memObjCtx) { + attributes->device = ihipGetDevice(); + HIP_RETURN(hipSuccess); + } + for (auto& ctx : g_devices) { + if (ctx->asContext() == memObjCtx) { + attributes->device = device; + HIP_RETURN(hipSuccess); + } + ++device; + } + HIP_RETURN(hipErrorInvalidDevice); + } + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipArrayDestroy(hipArray* array) { + HIP_INIT_API(hipArrayDestroy, array); + + HIP_RETURN(ihipArrayDestroy(array)); +} + +hipError_t hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, + hipArray* array) { + HIP_INIT_API(hipArray3DGetDescriptor, pArrayDescriptor, array); + + assert(false && "Unimplemented"); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, + hipArray* array) { + HIP_INIT_API(hipArrayGetDescriptor, pArrayDescriptor, array); + + assert(false && "Unimplemented"); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyParam2D, pCopy); + + HIP_RETURN(ihipMemcpyParam2D(pCopy, stream, true)); +} + +hipError_t ihipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = wOffsetSrc; + desc.srcY = hOffsetSrc; + desc.srcMemoryType = hipMemoryTypeArray; + desc.srcHost = nullptr; + desc.srcDevice = nullptr; + desc.srcArray = const_cast(src); + desc.srcPitch = 0; // Ignored. + + desc.dstXInBytes = wOffsetDst; + desc.dstY = hOffsetDst; + desc.dstMemoryType = hipMemoryTypeArray; + desc.dstHost = nullptr; + desc.dstDevice = nullptr; + desc.dstArray = dst; + desc.dstPitch = 0; // Ignored. + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DArrayToArray, dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind); + + HIP_RETURN(ihipMemcpy2DArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind, nullptr)); +} + +hipError_t hipMemcpyArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyArrayToArray, dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind); + + HIP_RETURN(ihipMemcpy2DArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind, nullptr)); +} + +hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DFromArray, dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind); + + HIP_RETURN(ihipMemcpy2DFromArray(dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind, nullptr)); +} + +hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DFromArrayAsync, dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream); + + HIP_RETURN(ihipMemcpy2DFromArray(dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream, true)); +} + +hipError_t hipMemcpyFromArrayAsync(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyFromArrayAsync, dst, src, wOffsetSrc, hOffsetSrc, count, kind, stream); + + if (src == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (src->height != 0) ? src->height : 1; + const size_t widthInBytes = count / arrayHeight; + + const size_t height = (count / src->width) / hip::getElementSize(src); + + HIP_RETURN(ihipMemcpy2DFromArray(dst, 0 /* dpitch */, src, wOffsetSrc, hOffsetSrc, widthInBytes, height, kind, stream, true)); +} + +hipError_t hipMemcpy2DToArrayAsync(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DToArrayAsync, dst, wOffset, hOffset, src, spitch, width, height, kind); + + HIP_RETURN(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind, stream, true)); +} + +hipError_t hipMemcpyToArrayAsync(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyToArrayAsync, dst, wOffset, hOffset, src, count, kind); + + if (dst == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (dst->height != 0) ? dst->height : 1; + const size_t widthInBytes = count / arrayHeight; + + const size_t height = (count / dst->width) / hip::getElementSize(dst); + + HIP_RETURN(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, 0 /* spitch */, widthInBytes, height, kind, stream, true)); +} + +hipError_t hipMemcpyAtoA(hipArray* dstArray, + size_t dstOffset, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoA, dstArray, dstOffset, srcArray, srcOffset, ByteCount); + + HIP_RETURN(ihipMemcpyAtoA(srcArray, dstArray, {srcOffset, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, nullptr)); +} + +hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoD, dstDevice, srcArray, srcOffset, ByteCount); + + HIP_RETURN(ihipMemcpyAtoD(srcArray, dstDevice, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t hipMemcpyAtoHAsync(void* dstHost, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyAtoHAsync, dstHost, srcArray, srcOffset, ByteCount, stream); + + HIP_RETURN(ihipMemcpyAtoH(srcArray, dstHost, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, stream, true)); +} + +hipError_t hipMemcpyDtoA(hipArray* dstArray, + size_t dstOffset, + hipDeviceptr_t srcDevice, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyDtoA, dstArray, dstOffset, srcDevice, ByteCount); + + HIP_RETURN(ihipMemcpyDtoA(srcDevice, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t hipMemcpyHtoAAsync(hipArray* dstArray, + size_t dstOffset, + const void* srcHost, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyHtoAAsync, dstArray, dstOffset, srcHost, ByteCount, stream); + + HIP_RETURN(ihipMemcpyHtoA(srcHost, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, stream, true)); +} + +hipError_t hipMipmappedArrayCreate(hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels) { + HIP_INIT_API(hipMipmappedArrayCreate, pHandle, pMipmappedArrayDesc, numMipmapLevels); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMallocMipmappedArray(hipMipmappedArray_t *mipmappedArray, + const hipChannelFormatDesc* desc, + hipExtent extent, + unsigned int numLevels, + unsigned int flags) { + HIP_INIT_API(hipMallocMipmappedArray, mipmappedArray, desc, extent, numLevels, flags); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMipmappedArrayDestroy(hipMipmappedArray_t hMipmappedArray) { + HIP_INIT_API(hipMipmappedArrayDestroy, hMipmappedArray); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray) { + HIP_INIT_API(hipFreeMipmappedArray, mipmappedArray); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMipmappedArrayGetLevel(hipArray_t* pLevelArray, + hipMipmappedArray_t hMipMappedArray, + unsigned int level) { + HIP_INIT_API(hipMipmappedArrayGetLevel, pLevelArray, hMipMappedArray, level); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipGetMipmappedArrayLevel(hipArray_t *levelArray, + hipMipmappedArray_const_t mipmappedArray, + unsigned int level) { + HIP_INIT_API(hipGetMipmappedArrayLevel, levelArray, mipmappedArray, level); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMallocHost(void** ptr, + size_t size) { + HIP_INIT_API(hipMallocHost, ptr, size); + + if (ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMalloc(ptr, size, CL_MEM_SVM_FINE_GRAIN_BUFFER)); +} + +hipError_t hipFreeHost(void *ptr) { + HIP_INIT_API(hipFreeHost, ptr); + + HIP_RETURN(ihipFree(ptr)); +} diff --git a/vdi/hip_module.cpp b/vdi/hip_module.cpp new file mode 100755 index 0000000000..3d40d8c967 --- /dev/null +++ b/vdi/hip_module.cpp @@ -0,0 +1,665 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "hip_event.hpp" +#include "hip_platform.hpp" + +hipError_t ihipModuleLoadData(hipModule_t *module, const void *image); + +const std::string& FunctionName(const hipFunction_t f) +{ + return hip::Function::asFunction(f)->function_->name(); +} + +static uint64_t ElfSize(const void *emi) +{ + const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi; + const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff); + + uint64_t max_offset = ehdr->e_shoff; + uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum; + + for (uint16_t i=0; i < ehdr->e_shnum; ++i){ + uint64_t cur_offset = static_cast(shdr[i].sh_offset); + if (max_offset < cur_offset) { + max_offset = cur_offset; + total_size = max_offset; + if(SHT_NOBITS != shdr[i].sh_type) { + total_size += static_cast(shdr[i].sh_size); + } + } + } + return total_size; +} + +hipError_t hipModuleLoad(hipModule_t* module, const char* fname) +{ + HIP_INIT_API(hipModuleLoad, module, fname); + + if (!fname) { + HIP_RETURN(hipErrorInvalidValue); + } + + std::ifstream file(fname, std::ios::binary); + + if (!file.is_open()) { + HIP_RETURN(hipErrorFileNotFound); + } + + std::vector tmp{std::istreambuf_iterator{file}, std::istreambuf_iterator{}}; + + HIP_RETURN(ihipModuleLoadData(module, tmp.data())); +} + +bool ihipModuleUnregisterGlobal(hipModule_t hmod) { + std::vector< std::pair >* modules = + PlatformState::instance().unregisterVar(hmod); + if (modules != nullptr) { + delete modules; + } + return true; +} + +hipError_t hipModuleUnload(hipModule_t hmod) +{ + HIP_INIT_API(hipModuleUnload, hmod); + + if (hmod == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + if(!PlatformState::instance().unregisterFunc(hmod)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + if(!ihipModuleUnregisterGlobal(hmod)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + program->release(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipModuleLoadData(hipModule_t *module, const void *image) +{ + HIP_INIT_API(hipModuleLoadData, module, image); + + HIP_RETURN(ihipModuleLoadData(module, image)); +} + +hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, + unsigned int numOptions, hipJitOption* options, + void** optionsValues) +{ + /* TODO: Pass options to Program */ + HIP_INIT_API(hipModuleLoadData, module, image); + + HIP_RETURN(ihipModuleLoadData(module, image)); +} + +extern hipError_t __hipExtractCodeObjectFromFatBinary(const void* data, + const std::vector& devices, + std::vector>& code_objs); + +inline bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) { + + std::vector undef_vars; + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) { + return false; + } + + for (auto it = undef_vars.begin(); it != undef_vars.end(); ++it) { + auto modules = new std::vector >(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + texture* tex_hptr + = new texture(); + memset(tex_hptr, 0x00, sizeof(texture)); + + PlatformState::DeviceVar dvar{ reinterpret_cast(tex_hptr), it->c_str(), sizeof(*tex_hptr), modules, + std::vector{ g_devices.size()}, true }; + PlatformState::instance().registerVar(it->c_str(), dvar); + } + + return true; +} + +inline bool ihipModuleRegisterFunc(amd::Program* program, hipModule_t* module) { + + std::vector func_names; + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + // Get all the global func names from COMGR + if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { + return false; + } + + return PlatformState::instance().registerModFuncs(func_names, module); +} + + +inline bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { + + size_t var_size = 0; + hipDeviceptr_t device_ptr = nullptr; + std::vector var_names; + + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { + return false; + } + + for (auto it = var_names.begin(); it != var_names.end(); ++it) { + auto modules = new std::vector >(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules, + std::vector{ g_devices.size()}, false }; + PlatformState::instance().registerVar(it->c_str(), dvar); + } + + return true; +} + +hipError_t ihipModuleLoadData(hipModule_t *module, const void *image) +{ + std::vector> code_objs; + hipError_t code_obj_err = __hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentDevice()->devices()[0]->info().name_}, code_objs); + if (code_obj_err == hipSuccess) { + image = code_objs[0].first; + } else if(code_obj_err == hipErrorNoBinaryForGpu) { + return code_obj_err; + } + + amd::Program* program = new amd::Program(*hip::getCurrentDevice()->asContext()); + if (program == NULL) { + return hipErrorOutOfMemory; + } + + program->setVarInfoCallBack(&getSvarInfo); + + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentDevice()->devices()[0], image, ElfSize(image))) { + return hipErrorInvalidKernelFile; + } + + *module = reinterpret_cast(as_cl(program)); + + if (!ihipModuleRegisterGlobal(program, module)) { + return hipErrorSharedObjectSymbolNotFound; + } + + if (!ihipModuleRegisterUndefined(program, module)) { + return hipErrorSharedObjectSymbolNotFound; + } + + if(CL_SUCCESS != program->build(hip::getCurrentDevice()->devices(), nullptr, nullptr, nullptr)) { + return hipErrorSharedObjectInitFailed; + } + + if (!ihipModuleRegisterFunc(program, module)) { + return hipErrorSharedObjectSymbolNotFound; + } + + return hipSuccess; +} + +hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) +{ + HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); + + if (!PlatformState::instance().findModFunc(hfunc, hmod, name)) { + HIP_RETURN(hipErrorNotFound); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name) +{ + HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name); + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), hmod, + dptr, bytes)) { + HIP_RETURN(hipErrorNotFound); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc) { + HIP_INIT_API(hipFuncGetAttribute, value, attrib, hfunc); + + if ((value == nullptr) || (hfunc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::Function* function = hip::Function::asFunction(hfunc); + if (function == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + amd::Kernel* kernel = function->function_; + if (kernel == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + const device::Kernel::WorkGroupInfo* wrkGrpInfo + = kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo(); + if (wrkGrpInfo == nullptr) { + HIP_RETURN(hipErrorMissingConfiguration); + } + + switch(attrib) { + case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->localMemSize_ + - wrkGrpInfo->privateMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: + *value = static_cast(wrkGrpInfo->wavefrontPerSIMD_ + * wrkGrpInfo->wavefrontSize_); + break; + case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->localMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_NUM_REGS: + *value = static_cast(wrkGrpInfo->availableGPRs_); + break; + case HIP_FUNC_ATTRIBUTE_PTX_VERSION: + *value = 30; // Defaults to 3.0 as HCC + break; + case HIP_FUNC_ATTRIBUTE_BINARY_VERSION: + *value = static_cast(kernel->signature().version()); + break; + case HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->availableLDSSize_); + break; + case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: + *value = 0; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) +{ + HIP_INIT_API(hipFuncGetAttributes, attr, func); + + if (!PlatformState::instance().getFuncAttr(func, attr)) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, + uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, + uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0) { + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params); + + hip::Function* function = hip::Function::asFunction(f); + amd::Kernel* kernel = function->function_; + + amd::ScopedLock lock(function->lock_); + + hip::Event* eStart = reinterpret_cast(startEvent); + hip::Event* eStop = reinterpret_cast(stopEvent); + amd::HostQueue* queue = hip::getQueue(hStream); + const amd::Device& device = queue->vdev()->device(); + + // Make sure dispatch doesn't exceed max workgroup size limit + if (blockDimX * blockDimY * blockDimZ > device.info().maxWorkGroupSize_) { + return hipErrorInvalidConfiguration; + } + + if (params & amd::NDRangeKernelCommand::CooperativeGroups) { + if (!device.info().cooperativeGroups_) { + return hipErrorLaunchFailure; + } + int num_blocks = 0; + int num_grids = 0; + int block_size = blockDimX * blockDimY * blockDimZ; + hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, f, block_size, sharedMemBytes, true); + if (((gridDimX * gridDimY * gridDimZ) / block_size) > unsigned(num_grids)) { + return hipErrorCooperativeLaunchTooLarge; + } + } + if (params & amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups) { + if (!device.info().cooperativeMultiDeviceGroups_) { + return hipErrorLaunchFailure; + } + } + if (!queue) { + return hipErrorOutOfMemory; + } + + size_t globalWorkOffset[3] = {0}; + size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ }; + size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ }; + amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); + amd::Command::EventWaitList waitList; + + address kernargs = nullptr; + + // 'extra' is a struct that contains the following info: { + // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, + // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, + // HIP_LAUNCH_PARAM_END } + if (extra != nullptr) { + if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || + extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) { + return hipErrorNotInitialized; + } + kernargs = reinterpret_cast
(extra[1]); + } + + const amd::KernelSignature& signature = kernel->signature(); + for (size_t i = 0; i < signature.numParameters(); ++i) { + const amd::KernelParameterDescriptor& desc = signature.at(i); + if (kernelParams == nullptr) { + assert(kernargs != nullptr); + kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, + desc.type_ == T_POINTER/*svmBound*/); + } else { + assert(extra == nullptr); + kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/); + } + } + + amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand( + *queue, waitList, *kernel, ndrange, sharedMemBytes, + params, gridId, numGrids, prevGridSum, allGridSum, firstDevice); + if (!command) { + return hipErrorOutOfMemory; + } + + // Capture the kernel arguments + if (CL_SUCCESS != command->captureAndValidate()) { + delete command; + return hipErrorOutOfMemory; + } + + command->enqueue(); + + if(startEvent != nullptr) { + eStart->addMarker(queue, command); + command->retain(); + } + if(stopEvent != nullptr) { + eStop->addMarker(queue, command); + command->retain(); + } + command->release(); + + return hipSuccess; +} + +hipError_t hipModuleLaunchKernel(hipFunction_t f, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, + void **kernelParams, void **extra) +{ + HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra); + + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr)); +} + +hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) +{ + HIP_INIT_API(NONE, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent, flags); + + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, + localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags)); +} + + + +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, + uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, + hipEvent_t stopEvent) +{ + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent); + + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); +} + +hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, + uint32_t gridDimY, uint32_t gridDimZ, + uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, + hipEvent_t stopEvent) +{ + HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent); + + HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); +} + +hipError_t hipLaunchCooperativeKernel(const void* f, + dim3 gridDim, dim3 blockDim, + void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) +{ + HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, + sharedMemBytes, hStream); + + int deviceId = ihipGetDevice(); + hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + HIP_RETURN(ihipModuleLaunchKernel(func, gridDim.x * blockDim.x, gridDim.y * blockDim.y, gridDim.z * blockDim.z, + blockDim.x, blockDim.y, blockDim.z, + sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0, + amd::NDRangeKernelCommand::CooperativeGroups)); +} + +hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags, uint32_t extFlags) +{ + int numActiveGPUs = 0; + ihipDeviceGetCount(&numActiveGPUs); + + if ((numDevices > numActiveGPUs) || (launchParamsList == nullptr)) { + return hipErrorInvalidValue; + } + + hipError_t result = hipErrorUnknown; + uint64_t allGridSize = 0; + std::vector mgpu_list(numDevices); + + for (int i = 0; i < numDevices; ++i) { + const hipLaunchParams& launch = launchParamsList[i]; + allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z; + + // Make sure block dimensions are valid + if (0 == launch.blockDim.x * launch.blockDim.y * launch.blockDim.z) { + return hipErrorInvalidConfiguration; + } + if (launch.stream != nullptr) { + // Validate devices to make sure it dosn't have duplicates + amd::HostQueue* queue = reinterpret_cast(launch.stream)->asHostQueue(); + auto device = &queue->vdev()->device(); + for (int j = 0; j < numDevices; ++j) { + if (mgpu_list[j] == device) { + return hipErrorInvalidDevice; + } + } + mgpu_list[i] = device; + } else { + return hipErrorInvalidResourceHandle; + } + } + uint64_t prevGridSize = 0; + uint32_t firstDevice = 0; + + // Sync the execution streams on all devices + if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + amd::HostQueue* queue = + reinterpret_cast(launchParamsList[i].stream)->asHostQueue(); + queue->finish(); + } + } + + for (int i = 0; i < numDevices; ++i) { + const hipLaunchParams& launch = launchParamsList[i]; + amd::HostQueue* queue = reinterpret_cast(launch.stream)->asHostQueue(); + hipFunction_t func = nullptr; + // The order of devices in the launch may not match the order in the global array + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + // Find the matching device and request the kernel function + if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) { + func = PlatformState::instance().getFunc(launch.func, dev); + // Save VDI index of the first device in the launch + if (i == 0) { + firstDevice = queue->vdev()->device().index(); + } + break; + } + } + if (func == nullptr) { + result = hipErrorInvalidDeviceFunction; + HIP_RETURN(result); + } + + result = ihipModuleLaunchKernel(func, + launch.gridDim.x * launch.blockDim.x, + launch.gridDim.y * launch.blockDim.y, + launch.gridDim.z * launch.blockDim.z, + launch.blockDim.x, launch.blockDim.y, launch.blockDim.z, + launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr, + flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice); + if (result != hipSuccess) { + break; + } + prevGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z; + } + + // Sync the execution streams on all devices + if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + amd::HostQueue* queue = + reinterpret_cast(launchParamsList[i].stream)->asHostQueue(); + queue->finish(); + } + } + + return result; +} + +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) +{ + HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags); + + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, + (amd::NDRangeKernelCommand::CooperativeGroups | + amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)); +} + +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); + + return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0); +} + +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) { + HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name); + + /* input args check */ + if ((texRef == nullptr) || (name == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getTexRef(name, hmod, texRef)) { + HIP_RETURN(hipErrorNotFound); + } + + // Texture references created by HIP driver API + // have the default read mode set to normalized float. + (*texRef)->readMode = hipReadModeNormalizedFloat; + + HIP_RETURN(hipSuccess); +} + diff --git a/vdi/hip_peer.cpp b/vdi/hip_peer.cpp new file mode 100644 index 0000000000..225361d525 --- /dev/null +++ b/vdi/hip_peer.cpp @@ -0,0 +1,127 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) { + HIP_INIT_API(NONE, canAccessPeer, thisCtx, peerCtx); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, + size_t sizeBytes) { + HIP_INIT_API(NONE, dst, dstCtx, src, srcCtx, sizeBytes); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(NONE, dst, dstDevice, src, srcDevice, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t canAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId){ + amd::Device* device = nullptr; + amd::Device* peer_device = nullptr; + if (canAccessPeer == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + /* Peer cannot be self */ + if (deviceId == peerDeviceId) { + *canAccessPeer = 0; + HIP_RETURN(hipSuccess); + } + /* Cannot exceed the max number of devices */ + if (static_cast(deviceId) >= g_devices.size() + || static_cast(peerDeviceId) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + device = g_devices[deviceId]->devices()[0]; + peer_device = g_devices[peerDeviceId]->devices()[0]; + *canAccessPeer = static_cast(std::find(device->p2pDevices_.begin(), + device->p2pDevices_.end(), as_cl(peer_device)) + != device->p2pDevices_.end()); + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceCanAccessPeer(int* canAccess, int deviceId, int peerDeviceId) { + HIP_INIT_API(hipDeviceCanAccessPeer, canAccess, deviceId, peerDeviceId); + HIP_RETURN(canAccessPeer(canAccess, deviceId, peerDeviceId)); +} + +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { + HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId); + int deviceId = hip::getCurrentDevice()->deviceId(); + int canAccess = 0; + if ((hipSuccess != canAccessPeer(&canAccess, deviceId, peerDeviceId)) || (canAccess == 0)) { + HIP_RETURN(hipErrorInvalidDevice); + } + HIP_RETURN(hip::getCurrentDevice()->DisablePeerAccess(peerDeviceId)); +} + +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { + HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags); + int deviceId = hip::getCurrentDevice()->deviceId(); + int canAccess = 0; + if (flags != 0) { + HIP_RETURN(hipErrorInvalidValue); + } + if ((hipSuccess != canAccessPeer(&canAccess, deviceId, peerDeviceId)) || (canAccess == 0)) { + HIP_RETURN(hipErrorInvalidDevice); + } + HIP_RETURN(hip::getCurrentDevice()->EnablePeerAccess(peerDeviceId)); +} + +hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes) { + HIP_INIT_API(hipMemcpyPeer, dst, dstDevice, src, srcDevice, sizeBytes); + + HIP_RETURN(hipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice)); +} + +hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemcpyPeerAsync, dst, dstDevice, src, srcDevice, sizeBytes, stream); + + HIP_RETURN(hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream)); +} + +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { + HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { + HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx); + + HIP_RETURN(hipSuccess); +} diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp new file mode 100755 index 0000000000..81ba2a2125 --- /dev/null +++ b/vdi/hip_platform.cpp @@ -0,0 +1,1229 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "platform/runtime.hpp" + +#include +#include "elfio.hpp" + +constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" + +thread_local std::stack execStack_; +PlatformState* PlatformState::platform_ = new PlatformState(); + +struct __CudaFatBinaryWrapper { + unsigned int magic; + unsigned int version; + void* binary; + void* dummy1; +}; + +#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" +#define HIP_AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa" +#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-" + +struct __ClangOffloadBundleDesc { + uint64_t offset; + uint64_t size; + uint64_t tripleSize; + const char triple[1]; +}; + +struct __ClangOffloadBundleHeader { + const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1]; + uint64_t numBundles; + __ClangOffloadBundleDesc desc[1]; +}; + +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, + hipModule_t hmod, const char* name); + +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes); + +static bool isCompatibleCodeObject(const std::string& codeobj_target_id, + const char* device_name) { + // Workaround for device name mismatch. + // Device name may contain feature strings delimited by '+', e.g. + // gfx900+xnack. Currently HIP-Clang does not include feature strings + // in code object target id in fat binary. Therefore drop the feature + // strings from device name before comparing it with code object target id. + std::string short_name(device_name); + auto feature_loc = short_name.find('+'); + if (feature_loc != std::string::npos) { + short_name.erase(feature_loc); + } + return codeobj_target_id == short_name; +} + +// Extracts code objects from fat binary in data for device names given in devices. +// Returns true if code objects are extracted successfully. +hipError_t __hipExtractCodeObjectFromFatBinary(const void* data, + const std::vector& devices, + std::vector>& code_objs) +{ + std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { + return hipErrorInvalidKernelFile; + } + + code_objs.resize(devices.size()); + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + unsigned num_code_objs = 0; + for (uint64_t i = 0; i < obheader->numBundles; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { + + std::size_t offset = 0; + if (!std::strncmp(desc->triple, HIP_AMDGCN_AMDHSA_TRIPLE, + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE) - 1)) { + offset = sizeof(HIP_AMDGCN_AMDHSA_TRIPLE); //For code objects created by CLang + } else if (!std::strncmp(desc->triple, HCC_AMDGCN_AMDHSA_TRIPLE, + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1)) { + offset = sizeof(HCC_AMDGCN_AMDHSA_TRIPLE); //For code objects created by Hcc + } else { + continue; + } + std::string target(desc->triple + offset, desc->tripleSize - offset); + + const void *image = reinterpret_cast( + reinterpret_cast(obheader) + desc->offset); + size_t size = desc->size; + + for (size_t dev = 0; dev < devices.size(); ++dev) { + const char* name = devices[dev]; + + if (!isCompatibleCodeObject(target, name)) { + continue; + } + code_objs[dev] = std::make_pair(image, size); + num_code_objs++; + } + } + if (num_code_objs == devices.size()) + return hipSuccess; + else + return hipErrorNoBinaryForGpu; +} + +extern "C" std::vector>* __hipRegisterFatBinary(const void* data) +{ + const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); + if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { + return nullptr; + } + + return PlatformState::instance().addFatBinary(fbwrapper->binary); +} + +void PlatformState::digestFatBinary(const void* data, std::vector>& programs) +{ + if (programs.size() > 0) { + return; + } + + std::vector> code_objs; + std::vector devices; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + devices.push_back(g_devices[dev]->devices()[0]->info().name_); + } + + if (hipSuccess != __hipExtractCodeObjectFromFatBinary((char*)data, devices, code_objs)) { + return; + } + + programs.resize(g_devices.size()); + + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]->asContext(); + amd::Program* program = new amd::Program(*ctx); + if (program == nullptr) { + return; + } + if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { + programs.at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); + } + } +} + +void PlatformState::init() +{ + amd::ScopedLock lock(lock_); + + if(initialized_ || g_devices.empty()) { + return; + } + initialized_ = true; + + for (auto& it : modules_) { + digestFatBinary(it.first, it.second); + } + for (auto& it : functions_) { + it.second.functions.resize(g_devices.size()); + } + for (auto& it : vars_) { + it.second.rvars.resize(g_devices.size()); + } +} + +bool PlatformState::unregisterFunc(hipModule_t hmod) { + amd::ScopedLock lock(lock_); + auto mod_it = module_map_.find(hmod); + if (mod_it != module_map_.cend()) { + PlatformState::Module* mod_ptr = mod_it->second; + if(mod_ptr != nullptr) { + for (auto func_it = mod_ptr->functions_.begin(); func_it != mod_ptr->functions_.end(); ++func_it) { + PlatformState::DeviceFunction &devFunc = func_it->second; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + if (devFunc.functions[dev] != 0) { + hip::Function* f = reinterpret_cast(devFunc.functions[dev]); + delete f; + } + } + delete devFunc.modules; + } + delete mod_ptr; + } + } + return true; +} + +std::vector< std::pair >* PlatformState::unregisterVar(hipModule_t hmod) { + amd::ScopedLock lock(lock_); + std::vector< std::pair >* rmodules = nullptr; + auto it = vars_.begin(); + while (it != vars_.end()) { + DeviceVar& dvar = it->second; + if ((*dvar.modules)[0].first == hmod) { + rmodules = dvar.modules; + if (dvar.dyn_undef) { + texture* tex_hptr + = reinterpret_cast *>(dvar.shadowVptr); + delete tex_hptr; + } + vars_.erase(it++); + } else { + ++it; + } + } + return rmodules; +} + +PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int deviceId, hipModule_t hmod) { + DeviceVar* dvar = nullptr; + if (hmod != nullptr) { + // If module is provided, then get the var only from that module + auto var_range = vars_.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + if ((*it->second.modules)[deviceId].first == hmod) { + dvar = &(it->second); + break; + } + } + } else { + // If var count is < 2, return the var + if (vars_.count(hostVar) < 2) { + auto it = vars_.find(hostVar); + dvar = ((it == vars_.end()) ? nullptr : &(it->second)); + } else { + // If var count is > 2, return the original var, + // if original var count != 1, return vars_.end()/Invalid + size_t orig_global_count = 0; + auto var_range = vars_.equal_range(hostVar); + for (auto it = var_range.first; it != var_range.second; ++it) { + // when dyn_undef is set, it is a shadow var + if (it->second.dyn_undef == false) { + ++orig_global_count; + dvar = &(it->second); + } + } + dvar = ((orig_global_count == 1) ? dvar : nullptr); + } + } + + return dvar; +} + +bool PlatformState::findSymbol(const void *hostVar, std::string &symbolName) { + auto it = symbols_.find(hostVar); + if (it != symbols_.end()) { + symbolName = it->second; + return true; + } + return false; +} + +void PlatformState::registerVarSym(const void *hostVar, const char *symbolName) { + amd::ScopedLock lock(lock_); + symbols_.insert(std::make_pair(hostVar, std::string(symbolName))); +} + +void PlatformState::registerVar(const char* hostvar, + const DeviceVar& rvar) { + amd::ScopedLock lock(lock_); + vars_.insert(std::make_pair(std::string(hostvar), rvar)); +} + +void PlatformState::registerFunction(const void* hostFunction, + const DeviceFunction& func) { + amd::ScopedLock lock(lock_); + functions_.insert(std::make_pair(hostFunction, func)); +} + +bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) { + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + const auto it = dev_program->kernels().find(std::string(func_name)); + if (it == dev_program->kernels().cend()) { + return false; + } + + const device::Kernel::WorkGroupInfo* wginfo = it->second->workGroupInfo(); + func_attr->localSizeBytes = wginfo->localMemSize_; + func_attr->sharedSizeBytes = wginfo->size_; + func_attr->maxThreadsPerBlock = wginfo->wavefrontSize_; + func_attr->numRegs = wginfo->usedVGPRs_; + + return true; +} + +bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod, + void** var_addr, size_t* var_size) { + DeviceVar* dvar = findVar(var_name, ihipGetDevice(), hmod); + if (dvar != nullptr) { + *var_addr = dvar->shadowVptr; + *var_size = dvar->size; + return true; + } else { + return false; + } +} + +bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, + size_t* var_size) { + return PlatformState::instance().getShadowVarInfo(var_name, reinterpret_cast(program), + var_addr, var_size); +} + +bool PlatformState::registerModFuncs(std::vector& func_names, hipModule_t* module) { + amd::ScopedLock lock(lock_); + PlatformState::Module* mod_ptr = new PlatformState::Module(*module); + + for (auto it = func_names.begin(); it != func_names.end(); ++it) { + auto modules = new std::vector >(g_devices.size()); + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + modules->at(dev) = std::make_pair(*module, true); + } + + PlatformState::DeviceFunction dfunc{*it, modules, + std::vector(g_devices.size(), 0)}; + mod_ptr->functions_.insert(std::make_pair(*it, dfunc)); + } + + module_map_.insert(std::make_pair(*module, mod_ptr)); + return true; +} + +bool PlatformState::findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name) { + amd::ScopedLock lock(lock_); + + auto mod_it = module_map_.find(hmod); + if (mod_it != module_map_.cend()) { + auto func_it = mod_it->second->functions_.find(name); + if (func_it != mod_it->second->functions_.cend()) { + PlatformState::DeviceFunction& devFunc = func_it->second; + if (devFunc.functions[ihipGetDevice()] == 0) { + if(!createFunc(&devFunc.functions[ihipGetDevice()], hmod, name)) { + return false; + } + } + *hfunc = devFunc.functions[ihipGetDevice()]; + return true; + } + } + return false; +} + +bool PlatformState::createFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + const amd::Symbol* symbol = program->findSymbol(name); + if (!symbol) { + return false; + } + + amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name); + if (!kernel) { + return false; + } + + hip::Function* f = new hip::Function(kernel); + *hfunc = f->asHipFunction(); + + return true; +} + + +hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { + amd::ScopedLock lock(lock_); + const auto it = functions_.find(hostFunction); + if (it != functions_.cend()) { + PlatformState::DeviceFunction& devFunc = it->second; + if (devFunc.functions[deviceId] == 0) { + hipModule_t module = (*devFunc.modules)[deviceId].first; + if (!(*devFunc.modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast(module)); + program->setVarInfoCallBack(&getSvarInfo); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return nullptr; + } + (*devFunc.modules)[deviceId].second = true; + } + hipFunction_t function = nullptr; + if (createFunc(&function, module, devFunc.deviceName.c_str()) && + function != nullptr) { + devFunc.functions[deviceId] = function; + } + else { + // tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" + // " device %d\n", deviceName, deviceId); + } + } + return devFunc.functions[deviceId]; + } + return nullptr; +} + +bool PlatformState::getFuncAttr(const void* hostFunction, + hipFuncAttributes* func_attr) { + if (func_attr == nullptr) { + return false; + } + + const auto it = functions_.find(hostFunction); + if (it == functions_.cend()) { + return false; + } + + PlatformState::DeviceFunction& devFunc = it->second; + int deviceId = ihipGetDevice(); + + /* If module has not been initialized yet, build the kernel now*/ + if (!(*devFunc.modules)[deviceId].second) { + if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) { + return false; + } + } + + amd::Program* program = as_amd(reinterpret_cast((*devFunc.modules)[deviceId].first)); + if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) { + return false; + } + return true; +} + +bool PlatformState::getTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef) { + amd::ScopedLock lock(lock_); + DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), hmod); + if (dvar == nullptr) { + return false; + } + + if (!dvar->dyn_undef) { + return false; + } + + *texRef = new (dvar->shadowVptr) texture{}; + + return true; +} + +bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + amd::ScopedLock lock(lock_); + DeviceVar* dvar = findVar(std::string(hostVar), deviceId, hmod); + if (dvar != nullptr) { + if (dvar->rvars[deviceId].getdeviceptr() == nullptr) { + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + amd::Memory* amd_mem_obj = nullptr; + + if (!(*dvar->modules)[deviceId].second) { + amd::Program* program = as_amd(reinterpret_cast((*dvar->modules)[deviceId].first)); + program->setVarInfoCallBack(&getSvarInfo); + if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + return false; + } + (*dvar->modules)[deviceId].second = true; + } + if((hipSuccess == ihipCreateGlobalVarObj(dvar->hostVar.c_str(), (*dvar->modules)[deviceId].first, + &amd_mem_obj, &device_ptr, &sym_size)) + && (device_ptr != nullptr)) { + dvar->rvars[deviceId].size_ = sym_size; + dvar->rvars[deviceId].devicePtr_ = device_ptr; + dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj; + amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj); + } else { + LogError("[HIP] __hipRegisterVar cannot find kernel for device \n"); + } + } + *size_ptr = dvar->rvars[deviceId].getvarsize(); + *dev_ptr = dvar->rvars[deviceId].getdeviceptr(); + return true; + } else { + return false; + } +} + +void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { + auto& arguments = execStack_.top().arguments_; + + if (arguments.size() < offset + size) { + arguments.resize(offset + size); + } + + ::memcpy(&arguments[offset], arg, size); +} + +void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { + execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); +} + +void PlatformState::popExec(ihipExec_t& exec) { + exec = std::move(execStack_.top()); + execStack_.pop(); +} + +extern "C" void __hipRegisterFunction( + std::vector >* modules, + const void* hostFunction, + char* deviceFunction, + const char* deviceName, + unsigned int threadLimit, + uint3* tid, + uint3* bid, + dim3* blockDim, + dim3* gridDim, + int* wSize) +{ + PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{g_devices.size()}}; + PlatformState::instance().registerFunction(hostFunction, func); +// for (size_t i = 0; i < g_devices.size(); ++i) { +// PlatformState::instance().getFunc(hostFunction, i); +// } +} + +// Registers a device-side global variable. +// For each global variable in device code, there is a corresponding shadow +// global variable in host code. The shadow host variable is used to keep +// track of the value of the device side global variable between kernel +// executions. +extern "C" void __hipRegisterVar( + std::vector >* modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int ext, // Whether this variable is external + size_t size, // Size of the variable + int constant, // Whether this variable is constant + int global) // Unknown, always 0 +{ + PlatformState::DeviceVar dvar{var, std::string{ hostVar }, size, modules, + std::vector{g_devices.size()}, false }; + + PlatformState::instance().registerVar(hostVar, dvar); + PlatformState::instance().registerVarSym(var, deviceVar); +} + +extern "C" void __hipUnregisterFatBinary(std::vector< std::pair >* modules) +{ + HIP_INIT(); + + std::for_each(modules->begin(), modules->end(), [](std::pair module){ + if (module.first != nullptr) { + as_amd(reinterpret_cast(module.first))->release(); + } + }); + if (modules->size() > 0) { + PlatformState::instance().unregisterVar((*modules)[0].first); + } + PlatformState::instance().removeFatBinary(modules); +} + +extern "C" hipError_t hipConfigureCall( + dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) +{ + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t __hipPushCallConfiguration( + dim3 gridDim, + dim3 blockDim, + size_t sharedMem, + hipStream_t stream) +{ + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim, + dim3 *blockDim, + size_t *sharedMem, + hipStream_t *stream) { + HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + + ihipExec_t exec; + PlatformState::instance().popExec(exec); + *gridDim = exec.gridDim_; + *blockDim = exec.blockDim_; + *sharedMem = exec.sharedMem_; + *stream = exec.hStream_; + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t hipSetupArgument( + const void *arg, + size_t size, + size_t offset) +{ + HIP_INIT_API(NONE, arg, size, offset); + + PlatformState::instance().setupArgument(arg, size, offset); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) +{ + HIP_INIT_API(NONE, hostFunction); + + ihipExec_t exec; + PlatformState::instance().popExec(exec); + + hip::Stream* stream = reinterpret_cast(exec.hStream_); + int deviceId = (stream != nullptr)? stream->device->deviceId() : ihipGetDevice(); + if (deviceId == -1) { + HIP_RETURN(hipErrorNoDevice); + } + hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); + if (func == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + size_t size = exec.arguments_.size(); + void *extra[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0], + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; + + HIP_RETURN(hipModuleLaunchKernel(func, + exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, + exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, + exec.sharedMem_, exec.hStream_, nullptr, extra)); +} + +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { + HIP_INIT_API(hipGetSymbolAddress, devPtr, symbol); + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + size_t size = 0; + if(!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + devPtr, &size)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) { + HIP_INIT_API(hipGetSymbolSize, sizePtr, symbol); + + std::string symbolName; + if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + hipDeviceptr_t devPtr = nullptr; + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + &devPtr, sizePtr)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + HIP_RETURN(hipSuccess); +} + +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes) +{ + HIP_INIT(); + + amd::Program* program = nullptr; + device::Program* dev_program = nullptr; + + /* Get Device Program pointer*/ + program = as_amd(reinterpret_cast(hmod)); + dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + if (dev_program == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + /* Find the global Symbols */ + if (!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + + HIP_RETURN(hipSuccess); +} + + +namespace hip_impl { +hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, int* numGrids, + const amd::Device& device, hipFunction_t func, int blockSize, + size_t dynamicSMemSize, bool bCalcPotentialBlkSz) +{ + hip::Function* function = hip::Function::asFunction(func); + const amd::Kernel& kernel = *function->function_; + + const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel.getDeviceKernel(device)->workGroupInfo(); + if (blockSize == 0) { + if (bCalcPotentialBlkSz == false){ + return hipErrorInvalidValue; + } + else { + blockSize = device.info().maxWorkGroupSize_; // maxwavefrontperblock + } + } + + // Make sure the requested block size is smaller than max supported + if (blockSize > int(device.info().maxWorkGroupSize_)) { + numBlocks = 0; + numGrids = 0; + return hipSuccess; + } + + // Find threads accupancy per CU => simd_per_cu * GPR usage + constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD + size_t VgprWaves = MaxWavesPerSimd; + if (wrkGrpInfo->usedVGPRs_ > 0) { + VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4); + } + + size_t GprWaves = VgprWaves; + if (wrkGrpInfo->usedSGPRs_ > 0) { + const size_t maxSGPRs = (device.info().gfxipVersion_ < 800) ? 512 : 800; + size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16); + GprWaves = std::min(VgprWaves, SgprWaves); + } + + size_t alu_accupancy = device.info().simdPerCU_ * std::min(MaxWavesPerSimd, GprWaves); + alu_accupancy *= wrkGrpInfo->wavefrontSize_; + // Calculate blocks occupancy per CU + *numBlocks = alu_accupancy / amd::alignUp(blockSize, wrkGrpInfo->wavefrontSize_); + + size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize; + if (total_used_lds != 0) { + // Calculate LDS occupancy per CU. lds_per_cu / (static_lsd + dynamic_lds) + int lds_occupancy = static_cast(device.info().localMemSize_ / total_used_lds); + *numBlocks = std::min(*numBlocks, lds_occupancy); + } + + if (bCalcPotentialBlkSz) { + *numGrids = *numBlocks * device.info().numRTCUs_; + } + + return hipSuccess; +} +} + +extern "C" { +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + const void* f, size_t dynSharedMemPerBlk, + int blockSizeLimit) +{ + HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); + if ((gridSize == nullptr) || (blockSize == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); + } + hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); + if (func == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int num_grids = 0; + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, func, 0, dynSharedMemPerBlk,true); + if (ret == hipSuccess) { + *blockSize = num_blocks; + *gridSize = num_grids; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit) +{ + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); + if ((gridSize == nullptr) || (blockSize == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int num_grids = 0; + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, f, 0, dynSharedMemPerBlk,true); + if (ret == hipSuccess) { + *blockSize = num_blocks; + *gridSize = num_grids; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags) +{ + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk, blockSizeLimit, flags); + if ((gridSize == nullptr) || (blockSize == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int num_grids = 0; + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, f, 0, dynSharedMemPerBlk,true); + if (ret == hipSuccess) { + *blockSize = num_blocks; + *gridSize = num_grids; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) +{ + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynSharedMemPerBlk); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, f, blockSize, dynSharedMemPerBlk, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + hipFunction_t f, int blockSize, + size_t dynSharedMemPerBlk, unsigned int flags) +{ + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynSharedMemPerBlk, flags); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, f, blockSize, dynSharedMemPerBlk, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + const void* f, int blockSize, size_t dynamicSMemSize) +{ + HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynamicSMemSize); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + + hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); + if (func == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, func, blockSize, dynamicSMemSize, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + const void* f, + int blockSize, size_t dynamicSMemSize, unsigned int flags) +{ + HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize, flags); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + + hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); + if (func == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, func, blockSize, dynamicSMemSize, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} +} + +#if defined(ATI_OS_LINUX) + +namespace hip_impl { + +struct dl_phdr_info { + ELFIO::Elf64_Addr dlpi_addr; + const char *dlpi_name; + const ELFIO::Elf64_Phdr *dlpi_phdr; + ELFIO::Elf64_Half dlpi_phnum; +}; + +extern "C" int dl_iterate_phdr( + int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data +); + +struct Symbol { + std::string name; + ELFIO::Elf64_Addr value = 0; + ELFIO::Elf_Xword size = 0; + ELFIO::Elf_Half sect_idx = 0; + uint8_t bind = 0; + uint8_t type = 0; + uint8_t other = 0; +}; + +inline Symbol read_symbol(const ELFIO::symbol_section_accessor& section, unsigned int idx) { + assert(idx < section.get_symbols_num()); + + Symbol r; + section.get_symbol(idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other); + + return r; +} + +template +inline ELFIO::section* find_section_if(ELFIO::elfio& reader, P p) { + const auto it = find_if(reader.sections.begin(), reader.sections.end(), std::move(p)); + + return it != reader.sections.end() ? *it : nullptr; +} + +std::vector> function_names_for(const ELFIO::elfio& reader, + ELFIO::section* symtab) { + std::vector> r; + ELFIO::symbol_section_accessor symbols{reader, symtab}; + + for (auto i = 0u; i != symbols.get_symbols_num(); ++i) { + auto tmp = read_symbol(symbols, i); + + if (tmp.type == STT_FUNC && tmp.sect_idx != SHN_UNDEF && !tmp.name.empty()) { + r.emplace_back(tmp.value, tmp.name); + } + } + + return r; +} + +const std::vector>& function_names_for_process() { + static constexpr const char self[] = "/proc/self/exe"; + + static std::vector> r; + static std::once_flag f; + + std::call_once(f, []() { + ELFIO::elfio reader; + + if (reader.load(self)) { + const auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; }); + + if (it) r = function_names_for(reader, it); + } + }); + + return r; +} + + +const std::unordered_map& function_names() +{ + static std::unordered_map r{ + function_names_for_process().cbegin(), + function_names_for_process().cend()}; + static std::once_flag f; + + std::call_once(f, []() { + dl_iterate_phdr([](dl_phdr_info* info, size_t, void*) { + ELFIO::elfio reader; + + if (reader.load(info->dlpi_name)) { + const auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; }); + + if (it) { + auto n = function_names_for(reader, it); + + for (auto&& f : n) f.first += info->dlpi_addr; + + r.insert(make_move_iterator(n.begin()), make_move_iterator(n.end())); + } + } + return 0; + }, + nullptr); + }); + + return r; +} + +std::vector bundles_for_process() { + static constexpr const char self[] = "/proc/self/exe"; + static constexpr const char kernel_section[] = ".kernel"; + std::vector r; + + ELFIO::elfio reader; + + if (reader.load(self)) { + auto it = find_section_if( + reader, [](const ELFIO::section* x) { return x->get_name() == kernel_section; }); + + if (it) r.insert(r.end(), it->get_data(), it->get_data() + it->get_size()); + } + + return r; +} + +const std::vector& modules() { + static std::vector r; + static std::once_flag f; + + std::call_once(f, []() { + static std::vector> bundles{bundles_for_process()}; + + dl_iterate_phdr( + [](dl_phdr_info* info, std::size_t, void*) { + ELFIO::elfio tmp; + if (tmp.load(info->dlpi_name)) { + const auto it = find_section_if( + tmp, [](const ELFIO::section* x) { return x->get_name() == ".kernel"; }); + + if (it) bundles.emplace_back(it->get_data(), it->get_data() + it->get_size()); + } + return 0; + }, + nullptr); + + for (auto&& bundle : bundles) { + if (bundle.empty()) { + continue; + } + std::string magic(&bundle[0], sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) + continue; + + const auto obheader = reinterpret_cast(&bundle[0]); + const auto* desc = &obheader->desc[0]; + for (uint64_t i = 0; i < obheader->numBundles; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { + + std::string triple(desc->triple, sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1); + if (triple.compare(HCC_AMDGCN_AMDHSA_TRIPLE)) + continue; + + std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE), + desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE)); + + if (isCompatibleCodeObject(target, hip::getCurrentDevice()->devices()[0]->info().name_)) { + hipModule_t module; + if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast( + reinterpret_cast(obheader) + desc->offset))) + r.push_back(module); + break; + } + } + } + }); + + return r; +} + +const std::unordered_map& functions() +{ + static std::unordered_map r; + static std::once_flag f; + + std::call_once(f, []() { + for (auto&& function : function_names()) { + for (auto&& module : modules()) { + hipFunction_t f; + if (hipSuccess == hipModuleGetFunction(&f, module, function.second.c_str())) { + r[function.first] = f; + } + } + } + }); + + return r; +} + + +void hipLaunchKernelGGLImpl( + uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) +{ + HIP_INIT(); + + const auto it = functions().find(function_address); + if (it == functions().cend()) + assert(0); + + hipModuleLaunchKernel(it->second, + numBlocks.x, numBlocks.y, numBlocks.z, + dimBlocks.x, dimBlocks.y, dimBlocks.z, + sharedMemBytes, stream, nullptr, kernarg); +} + +void hipLaunchCooperativeKernelGGLImpl( + uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) +{ + HIP_INIT(); + + hipLaunchCooperativeKernel(reinterpret_cast(function_address), + numBlocks, dimBlocks, kernarg, sharedMemBytes, stream); +} + +} + +#endif // defined(ATI_OS_LINUX) + +extern "C" hipError_t hipLaunchKernel(const void *hostFunction, + dim3 gridDim, + dim3 blockDim, + void** args, + size_t sharedMemBytes, + hipStream_t stream) +{ + HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, + stream); + + hip::Stream* s = reinterpret_cast(stream); + int deviceId = (s != nullptr)? s->device->deviceId() : ihipGetDevice(); + if (deviceId == -1) { + HIP_RETURN(hipErrorNoDevice); + } + hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); + if (func == nullptr) { +#ifdef ATI_OS_LINUX + const auto it = hip_impl::functions().find(reinterpret_cast(hostFunction)); + if (it == hip_impl::functions().cend()) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + func = it->second; +#else + HIP_RETURN(hipErrorInvalidDeviceFunction); +#endif + } + + HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z, + blockDim.x, blockDim.y, blockDim.z, + sharedMemBytes, stream, args, nullptr)); +} + +// conversion routines between float and half precision +static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; } +static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; } +static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); } + +// half float, the f16 is in the low 16 bits of the input argument +static inline float __convert_half_to_float(std::uint32_t a) noexcept { + std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; + std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U; + u = (a & 0x7fff) != 0 ? v : u; + return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/; +} + +// float half with nearest even rounding +// The lower 16 bits of the result is the bit pattern for the f16 +static inline std::uint32_t __convert_float_to_half(float a) noexcept { + std::uint32_t u = f32_as_u32(a); + int e = static_cast((u >> 23) & 0xff) - 127 + 15; + std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0); + std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0); + std::uint32_t n = ((std::uint32_t)e << 12) | m; + std::uint32_t s = (u >> 16) & 0x8000; + int b = clamp_int(1-e, 0, 13); + std::uint32_t d = (0x1000 | m) >> b; + d |= (d << b) != (0x1000 | m); + std::uint32_t v = e < 1 ? d : n; + v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5)); + v = e > 30 ? 0x7c00 : v; + v = e == 143 ? i : v; + return s | v; +} + +extern "C" float __gnu_h2f_ieee(unsigned short h){ + return __convert_half_to_float((std::uint32_t) h); +} + +extern "C" unsigned short __gnu_f2h_ieee(float f){ + return (unsigned short)__convert_float_to_half(f); +} diff --git a/vdi/hip_platform.hpp b/vdi/hip_platform.hpp new file mode 100644 index 0000000000..8e5eaa191f --- /dev/null +++ b/vdi/hip_platform.hpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ +#pragma once + +#include "device/device.hpp" + +namespace hip_impl { +hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, int* numGrids, + const amd::Device& device, hipFunction_t func, int blockSize, + size_t dynamicSMemSize, bool bCalcPotentialBlkSz); +} \ No newline at end of file diff --git a/vdi/hip_prof_api.h b/vdi/hip_prof_api.h new file mode 100644 index 0000000000..ff81fb7cf5 --- /dev/null +++ b/vdi/hip_prof_api.h @@ -0,0 +1,250 @@ +/* Copyright (c) 2019-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_SRC_HIP_PROF_API_H +#define HIP_SRC_HIP_PROF_API_H + +#include +#include +#include + +#if USE_PROF_API +#include "hip/hcc_detail/hip_prof_str.h" +#include "platform/prof_protocol.h" + +// HIP API callbacks spawner object macro +#define HIP_CB_SPAWNER_OBJECT(CB_ID) \ + api_callbacks_spawner_t __api_tracer; \ + { \ + hip_api_data_t* api_data = __api_tracer.get_api_data_ptr(); \ + if (api_data != NULL) { \ + hip_api_data_t& api_data_ref = *api_data; \ + INIT_CB_ARGS_DATA(CB_ID, api_data_ref); \ + __api_tracer.call(); \ + } \ + } + +static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API; +typedef activity_record_t hip_api_record_t; +typedef activity_rtapi_callback_t hip_api_callback_t; +typedef activity_sync_callback_t hip_act_callback_t; + +class api_callbacks_table_t { + public: + typedef std::mutex mutex_t; + + typedef hip_api_record_t record_t; + typedef hip_api_callback_t fun_t; + typedef hip_act_callback_t act_t; + + // HIP API callbacks table + struct hip_cb_table_entry_t { + volatile std::atomic sync; + volatile std::atomic sem; + act_t act; + void* a_arg; + fun_t fun; + void* arg; + }; + + struct hip_cb_table_t { + hip_cb_table_entry_t arr[HIP_API_ID_NUMBER]; + }; + + api_callbacks_table_t() { + memset(&callbacks_table_, 0, sizeof(callbacks_table_)); + } + + bool set_activity(uint32_t id, act_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + + if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].act = fun; + callbacks_table_.arr[id].a_arg = arg; + enabled_ = true; + cb_release(id); + } else { + ret = false; + } + + return ret; + } + + bool set_callback(uint32_t id, fun_t fun, void* arg) { + std::lock_guard lock(mutex_); + bool ret = true; + + if (id < HIP_API_ID_NUMBER) { + cb_sync(id); + callbacks_table_.arr[id].fun = fun; + callbacks_table_.arr[id].arg = arg; + enabled_ = true; + cb_release(id); + } else { + ret = false; + } + + return ret; + } + + void set_enabled(const bool& enabled) { + enabled_ = enabled; + } + + inline hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table_.arr[id]; + } + + inline void sem_sync(const uint32_t& id) { + sem_increment(id); + if (entry(id).sync.load() == true) sync_wait(id); + } + + inline void sem_release(const uint32_t& id) { + sem_decrement(id); + } + + inline bool is_enabled() const { + return enabled_; + } + + private: + inline void cb_sync(const uint32_t& id) { + entry(id).sync.store(true); + while (entry(id).sem.load() != 0) {} + } + + inline void cb_release(const uint32_t& id) { + entry(id).sync.store(false); + } + + inline void sem_increment(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_add(1); + if (prev == UINT32_MAX) { + std::cerr << "sem overflow id = " << id << std::endl << std::flush; + abort(); + } + } + + inline void sem_decrement(const uint32_t& id) { + const uint32_t prev = entry(id).sem.fetch_sub(1); + if (prev == 0) { + std::cerr << "sem corrupted id = " << id << std::endl << std::flush; + abort(); + } + } + + void sync_wait(const uint32_t& id) { + sem_decrement(id); + while (entry(id).sync.load() == true) {} + sem_increment(id); + } + + mutex_t mutex_; + hip_cb_table_t callbacks_table_; + bool enabled_; +}; + +extern api_callbacks_table_t callbacks_table; + +template +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t() : + api_data_(NULL) + { + if (!is_enabled()) return; + + if (cid_ >= HIP_API_ID_NUMBER) { + fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_); + abort(); + } + callbacks_table.sem_sync(cid_); + + hip_act_callback_t act = entry(cid_).act; + if (act != NULL) api_data_ = (hip_api_data_t*) act(cid_, NULL, NULL, NULL); + } + + void call() { + hip_api_callback_t fun = entry(cid_).fun; + void* arg = entry(cid_).arg; + if (fun != NULL) { + fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + api_data_->phase = ACTIVITY_API_PHASE_EXIT; + } + } + + ~api_callbacks_spawner_t() { + if (!is_enabled()) return; + + if (api_data_ != NULL) { + hip_api_callback_t fun = entry(cid_).fun; + void* arg = entry(cid_).arg; + hip_act_callback_t act = entry(cid_).act; + void* a_arg = entry(cid_).a_arg; + if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg); + if (act != NULL) act(cid_, NULL, NULL, a_arg); + } + + callbacks_table.sem_release(cid_); + } + + hip_api_data_t* get_api_data_ptr() { + return api_data_; + } + + bool is_enabled() const { + return callbacks_table.is_enabled(); + } + + private: + inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) { + return callbacks_table.entry(id); + } + + hip_api_data_t* api_data_; +}; + +template <> +class api_callbacks_spawner_t { + public: + api_callbacks_spawner_t() {} + void call() {} + hip_api_data_t* get_api_data_ptr() { return NULL; } + bool is_enabled() const { return false; } +}; + +#else + +#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0) + +class api_callbacks_table_t { + public: + typedef void* act_t; + typedef void* fun_t; + bool set_activity(uint32_t id, act_t fun, void* arg) { return false; } + bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; } +}; + +#endif + +#endif // HIP_SRC_HIP_PROF_API_H diff --git a/vdi/hip_prof_gen.py b/vdi/hip_prof_gen.py new file mode 100755 index 0000000000..04f92e0a00 --- /dev/null +++ b/vdi/hip_prof_gen.py @@ -0,0 +1,612 @@ +#!/usr/bin/python + +# Copyright (c) 2019-present Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import os, sys, re + +PROF_HEADER = "hip_prof_str.h" +OUTPUT = PROF_HEADER +REC_MAX_LEN = 1024 + +# Recursive sources processing +recursive_mode = 0 +# HIP_INIT_API macro patching +hip_patch_mode = 0 +# API matching types check +types_check_mode = 0 +# Private API check +private_check_mode = 0 + +# Messages and errors controll +verbose = 0 +errexit = 0 +inp_file = 'none' +line_num = -1 + +# Verbose message +def message(msg): + if verbose: sys.stdout.write(msg + '\n') + +# Fatal error termination +def error(msg): + if line_num != -1: + msg += ", file '" + inp_file + "', line (" + str(line_num) + ")" + if errexit: + msg = " Error: " + msg + else: + msg = " Warning: " + msg + + sys.stdout.write(msg + '\n') + sys.stderr.write(sys.argv[0] + msg +'\n') + +def fatal(msg): + error(msg) + sys.exit(1) + +############################################################# +# Normalizing API name +def filtr_api_name(name): + name = re.sub(r'\s*$', r'', name); + return name + +def filtr_api_decl(record): + record = re.sub("\s__dparm\([^\)]*\)", '', record); + record = re.sub("\(void\*\)", '', record); + return record + +# Normalizing API arguments +def filtr_api_args(args_str): + args_str = re.sub(r'^\s*', r'', args_str); + args_str = re.sub(r'\s*$', r'', args_str); + args_str = re.sub(r'\s*,\s*', r',', args_str); + args_str = re.sub(r'\s+', r' ', args_str); + #args_str = re.sub(r'void \*', r'void* ', args_str); + args_str = re.sub(r'\s*(\*+)\s*', r'\1 ', args_str); + args_str = re.sub(r'(enum|struct) ', '', args_str); + return args_str + +# Normalizing types +def norm_api_types(type_str): + type_str = re.sub(r'uint32_t', r'unsigned int', type_str) + type_str = re.sub(r'^unsigned$', r'unsigned int', type_str) + return type_str + +# Creating a list of arguments [(type, name), ...] +def list_api_args(args_str): + args_str = filtr_api_args(args_str) + args_list = [] + if args_str != '': + for arg_pair in args_str.split(','): + if arg_pair == 'void': continue + arg_pair = re.sub(r'\s*=\s*\S+$','', arg_pair); + m = re.match("^(.*)\s(\S+)$", arg_pair); + if m: + arg_type = norm_api_types(m.group(1)) + arg_name = m.group(2) + args_list.append((arg_type, arg_name)) + else: + fatal("bad args: args_str: '" + args_str + "' arg_pair: '" + arg_pair + "'") + return args_list; + +# Creating arguments string "type0, type1, ..." +def filtr_api_types(args_str): + args_list = list_api_args(args_str) + types_str = '' + for arg_tuple in args_list: + types_str += arg_tuple[0] + ', ' + return types_str + +# Creating options list [opt0, opt1, ...] +def filtr_api_opts(args_str): + args_list = list_api_args(args_str) + opts_list = [] + for arg_tuple in args_list: + opts_list.append(arg_tuple[1]) + return opts_list +############################################################# +# Parsing API header +# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); +def parse_api(inp_file_p, out): + global inp_file + global line_num + inp_file = inp_file_p + + beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\("); + api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)"); + end_pattern = re.compile("Texture"); + hidden_pattern = re.compile(r'__attribute__\(\(visibility\("hidden"\)\)\)') + nms_open_pattern = re.compile(r'namespace hip_impl {') + nms_close_pattern = re.compile(r'}') + + inp = open(inp_file, 'r') + + found = 0 + hidden = 0 + nms_level = 0; + record = "" + line_num = -1 + + for line in inp.readlines(): + record += re.sub(r'^\s+', r' ', line[:-1]) + line_num += 1 + + if len(record) > REC_MAX_LEN: + fatal("bad record \"" + record + "\"") + + m = beg_pattern.match(line) + if m: + name = m.group(2) + if hidden != 0: + message("api: " + name + " - hidden") + elif nms_level != 0: + message("api: " + name + " - hip_impl") + else: + message("api: " + name) + found = 1 + + if found != 0: + record = re.sub("\s__dparm\([^\)]*\)", '', record); + m = api_pattern.match(record) + if m: + found = 0 + if end_pattern.search(record): break + api_name = filtr_api_name(m.group(2)) + api_args = m.group(3) + if not api_name in out: + out[api_name] = api_args + else: continue + + hidden = 0 + if hidden_pattern.match(line): hidden = 1 + + if nms_open_pattern.match(line): nms_level += 1 + if (nms_level > 0) and nms_close_pattern.match(line): nms_level -= 1 + if nms_level < 0: + fatal("nms level < 0") + + record = "" + + inp.close() + line_num = -1 +############################################################# +# Parsing API implementation +# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) { +# HIP_INIT_API(hipSetupArgument, arg, size, offset); +# inp_file - input implementation source file +# api_map - input public API map [] => +# out - output map [] => [opt0, opt1, ...] +def parse_content(inp_file_p, api_map, out): + global hip_patch_mode + global types_check_mode + global private_check_mode + global inp_file + global line_num + inp_file = inp_file_p + + # API method begin pattern + beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\("); + # API declaration pattern + decl_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*;"); + # API definition pattern + api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*{"); + # API init macro pattern + init_pattern = re.compile("(^\s*HIP_INIT_API\s*)\((([^,]+)(,.*|)|)(\);|,)\s*$"); + + # Open input file + inp = open(inp_file, 'r') + + # API name + api_name = "" + # Valid public API found flag + api_valid = 0 + + # Input file patched content + content = '' + # Sub content for found API defiition + sub_content = '' + # Current record, accumulating several API definition related lines + record = '' + # Current input file line number + line_num = -1 + # API beginning found flag + found = 0 + + # Reading input file + for line in inp.readlines(): + # Accumulating record + record += re.sub(r'^\s+', r' ', line[:-1]) + line_num += 1 + + if len(record) > REC_MAX_LEN: + fatal("bad record \"" + record + "\"") + break; + + # Looking for API begin + if found == 0: + if beg_pattern.match(record): + found = 1 + record = filtr_api_decl(record) + + # Matching API declaration + if found == 1: + if decl_pattern.match(record): + found = 0 + + # Matching API definition + if found == 1: + m = api_pattern.match(record) + # Checking if complete API matched + if m: + found = 2 + api_name = filtr_api_name(m.group(2)) + # Checking if API name is in the API map + if (private_check_mode == 0) or (api_name in api_map): + if not api_name in api_map: api_map[api_name] = '' + # Getting API arguments + api_args = m.group(3) + # Getting etalon arguments from the API map + eta_args = api_map[api_name] + if eta_args == '': + eta_args = api_args + api_map[api_name] = eta_args + # Normalizing API arguments + api_types = filtr_api_types(api_args) + # Normalizing etalon arguments + eta_types = filtr_api_types(eta_args) + if (api_types == eta_types) or ((types_check_mode == 0) and (not api_name in out)): + # API is already found and not is mismatched + if (api_name in out): + fatal("API redefined \"" + api_name + "\", record \"" + record + "\"") + # Set valid public API found flag + api_valid = 1 + # Set output API map with API arguments list + out[api_name] = filtr_api_opts(api_args) + # Register missmatched API methods + else: + # Warning about mismatched API, possible non public overloaded version + api_diff = '\t\t' + inp_file + " line(" + str(line_num) + ")\n\t\tapi: " + api_types + "\n\t\teta: " + eta_types + message("\t" + api_name + ' args mismatch:\n' + api_diff + '\n') + + if hip_patch_mode != 0: + # Looking for INIT macro + m = init_pattern.match(line) + if m: + if api_valid == 0: api_name = 'NONE' + + if api_name == m.group(3): + if hip_patch_mode == 1: hip_patch_mode = 0 + else: fatal("patching failed") + else: + hip_patch_mode = 2 + init_args = m.group(2) + if init_args != '': init_args = ', ' + init_args + line = m.group(1) + '(' + api_name + init_args + m.group(5) + '\n' + non_public_api = 0 + + # API found action + if found == 2: + # Looking for INIT macro + m = init_pattern.match(line) + if m: + found = 0 + non_public_api = 0 + + if api_valid == 1: + api_valid = 0 + message("\t" + api_name) + else: + non_public_api = 1 + + if non_public_api == 1: + # Registering dummy API for non public API if the name in INIT is not NONE + init_name = m.group(3) + # Ignore if it is initialized as NONE + if init_name != 'NONE': + # Check if init name matching API name + if init_name != api_name: + fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'") + # If init name is not in public API map then it is private API + # else it was not identified and will be checked on finish + if not init_name in api_map: + if init_name in out: + fatal("API reinit \"" + api_name + "\", record \"" + record + "\"") + out[init_name] = [] + elif re.search('}', line): + found = 0 + # Expect INIT macro for valid public API + # Removing and registering non-conformant APIs with missing HIP_INIT macro + if api_valid == 1: + api_valid = 0 + if api_name in out: + del out[api_name] + del api_map[api_name] + # Registering non-conformant APIs + out['.' + api_name] = 1 + else: + fatal("API is not in out \"" + api_name + "\", record \"" + record + "\"") + + if found != 1: record = "" + content += line + + inp.close() + line_num = -1 + + if len(out) != 0: + return content + else: + return '' + +# src path walk +def parse_src(api_map, src_path, src_patt, out): + global recursive_mode + + pattern = re.compile(src_patt) + src_path = re.sub(r'\s', '', src_path) + for src_dir in src_path.split(':'): + message("Parsing " + src_dir + " for '" + src_patt + "'") + for root, dirs, files in os.walk(src_dir): + for fnm in files: + if pattern.search(fnm): + file = root + '/' + fnm + message(file) + content = parse_content(file, api_map, out); + if (hip_patch_mode != 0) and (content != ''): + f = open(file, 'w') + f.write(content) + f.close() + if recursive_mode == 0: break +############################################################# +# Generating profiling primitives header +# api_map - public API map [] => [(type, name), ...] +# opts_map - opts map [] => [opt0, opt1, ...] +def generate_prof_header(f, api_map, opts_map): + # Private API list + priv_lst = [] + + f.write('// automatically generated sources\n') + f.write('#ifndef _HIP_PROF_STR_H\n'); + f.write('#define _HIP_PROF_STR_H\n'); + f.write('#define HIP_PROF_VER 1\n') + + # Generating dummy macro for non-public API + f.write('\n// Dummy API primitives\n') + f.write('#define INIT_NONE_CB_ARGS_DATA(cb_data) {};\n') + for name in opts_map: + if not name in api_map: + opts_lst = opts_map[name] + if len(opts_lst) != 0: + fatal("bad dummy API \"" + name + "\", args: " + str(opts_lst)) + f.write('#define INIT_'+ name + '_CB_ARGS_DATA(cb_data) {};\n') + priv_lst.append(name) + + for name in priv_lst: + message("Private: " + name) + + # Generating the callbacks ID enumaration + f.write('\n// HIP API callbacks ID enumaration\n') + f.write('enum hip_api_id_t {\n') + cb_id = 0 + for name in api_map.keys(): + f.write(' HIP_API_ID_' + name + ' = ' + str(cb_id) + ',\n') + cb_id += 1 + f.write(' HIP_API_ID_NUMBER = ' + str(cb_id) + ',\n') + f.write('\n') + f.write(' HIP_API_ID_NONE = HIP_API_ID_NUMBER,\n') + for name in priv_lst: + f.write(' HIP_API_ID_' + name + ' = HIP_API_ID_NUMBER,\n') + f.write('};\n') + + # Generating the callbacks ID enumaration + f.write('\n// Return HIP API string\n') + f.write('inline const char* hip_api_name(const uint32_t id) {\n') + f.write(' switch(id) {\n') + for name in api_map.keys(): + f.write(' case HIP_API_ID_' + name + ': return "' + name + '";\n') + f.write(' };\n') + f.write(' return "unknown";\n') + f.write('};\n') + + # Generating the callbacks data structure + f.write('\n// HIP API callbacks data structure\n') + f.write( + 'typedef struct hip_api_data_s {\n' + + ' uint64_t correlation_id;\n' + + ' uint32_t phase;\n' + + ' union {\n' + ) + for name, args in api_map.items(): + if len(args) != 0: + f.write(' struct {\n') + for arg_tuple in args: + if arg_tuple[0] == "hipLimit_t": + f.write(' enum ' + arg_tuple[0] + ' ' + arg_tuple[1] + ';\n') + else: + f.write(' ' + arg_tuple[0] + ' ' + arg_tuple[1] + ';\n') + f.write(' } ' + name + ';\n') + f.write( + ' } args;\n' + + '} hip_api_data_t;\n' + ) + + # Generating the callbacks args data filling macros + f.write('\n// HIP API callbacks args data filling macros\n') + for name, args in api_map.items(): + f.write('// ' + name + str(args) + '\n') + f.write('#define INIT_' + name + '_CB_ARGS_DATA(cb_data) { \\\n') + if name in opts_map: + opts_list = opts_map[name] + if len(args) != len(opts_list): + fatal("\"" + name + "\" API args and opts mismatch, args: " + str(args) + ", opts: " + str(opts_list)) + # API args iterating: + # type is args[][0] + # name is args[][1] + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_type = arg_tuple[0] + fld_name = arg_tuple[1] + arg_name = opts_list[ind] + f.write(' cb_data.args.' + name + '.' + fld_name + ' = (' + arg_type + ')' + arg_name + '; \\\n') + f.write('};\n') + f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n') + + # Generating the method for the API string, name and parameters + f.write('\n') + f.write('#if HIP_PROF_HIP_API_STRING\n') + f.write('#include \n'); + f.write('#include \n'); + f.write('// HIP API string method, method name and parameters\n') + f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') + f.write(' std::ostringstream oss;\n') + f.write(' switch (id) {\n') + for name, args in api_map.items(): + f.write(' case HIP_API_ID_' + name + ':\n') + f.write(' oss << "' + name + '("') + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_name = arg_tuple[1] + if ind != 0: f.write(' << ","') + f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) + f.write('\n << ")";\n') + f.write(' break;\n') + f.write(' default: oss << "unknown";\n') + f.write(' };\n') + f.write(' return strdup(oss.str().c_str());\n') + f.write('};\n') + f.write('#endif // HIP_PROF_HIP_API_STRING\n') + + f.write('#endif // _HIP_PROF_STR_H\n'); + +############################################################# +# main +while len(sys.argv) > 1: + if not re.match(r'-', sys.argv[1]): break + + if (sys.argv[1] == '-v'): + verbose = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-r'): + recursive_mode = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-t'): + types_check_mode = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '--priv'): + private_check_mode = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-e'): + errexit = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-p'): + hip_patch_mode = 1 + sys.argv.pop(1) + +# Usage +if (len(sys.argv) < 3): + fatal ("Usage: " + sys.argv[0] + " [-v] []\n" + + " -v - verbose messages\n" + + " -r - process source directory recursively\n" + + " -t - API types matching check\n" + + " --priv - private API check\n" + + " -e - on error exit mode\n" + + " -p - HIP_INIT_API macro patching mode\n" + + "\n" + + " Example:\n" + + " $ " + sys.argv[0] + " -v -p -t --priv ./api/hip/include/hip/hcc_detail/hip_runtime_api.h ./api/hip ./api/hip/include/hip/hcc_detail/hip_prof_str.h"); + +# API header file given as an argument +src_pat = "\.cpp$" +api_hfile = sys.argv[1] +if not os.path.isfile(api_hfile): + fatal("input file '" + api_hfile + "' not found") + +# Srcs directory given as an argument +src_dir = sys.argv[2] +if not os.path.isdir(src_dir): + fatal("src directory " + src_dir + "' not found") + +if len(sys.argv) > 3: OUTPUT = sys.argv[3] + +# API declaration map +api_map = { + 'hipSetupArgument': '', + 'hipMalloc3DArray': '', + 'hipFuncGetAttribute': '', + 'hipMemset3DAsync': '', + 'hipKernelNameRef': '', + 'hipStreamGetPriority': '', + 'hipLaunchByPtr': '', + 'hipFreeHost': '', + 'hipGetErrorName': '', + 'hipMemcpy3DAsync': '', + 'hipMemcpyParam2DAsync': '', + 'hipArray3DCreate': '', + 'hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags': '', + 'hipOccupancyMaxPotentialBlockSize': '', + 'hipMallocManaged': '', + 'hipOccupancyMaxActiveBlocksPerMultiprocessor': '', + 'hipGetErrorString': '', + 'hipMallocHost': '', + 'hipModuleLoadDataEx': '', + 'hipGetDeviceProperties': '', + 'hipConfigureCall': '', + 'hipHccModuleLaunchKernel': '', + 'hipExtModuleLaunchKernel': '', +} +# API options map +opts_map = {} + +# Parsing API header +parse_api(api_hfile, api_map) + +# Parsing sources +parse_src(api_map, src_dir, src_pat, opts_map) + +# Checking for non-conformant APIs with missing HIP_INIT macro +for name in list(opts_map.keys()): + m = re.match(r'\.(\S*)', name) + if m: + message("Init missing: " + m.group(1)) + del opts_map[name] + +# Converting api map to map of lists +# Checking for not found APIs +not_found = 0 +if len(opts_map) != 0: + for name in api_map.keys(): + args_str = api_map[name]; + api_map[name] = list_api_args(args_str) + if not name in opts_map: + error("implementation not found: " + name) + not_found += 1 +if not_found != 0: + error(str(not_found) + " API calls missing in interception layer") + +# Generating output header file +with open(OUTPUT, 'w') as f: + generate_prof_header(f, api_map, opts_map) + +# Successfull exit +sys.exit(0) diff --git a/vdi/hip_profile.cpp b/vdi/hip_profile.cpp new file mode 100644 index 0000000000..3422f428ea --- /dev/null +++ b/vdi/hip_profile.cpp @@ -0,0 +1,40 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipProfilerStart() { + HIP_INIT_API(hipProfilerStart); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + + +hipError_t hipProfilerStop() { + HIP_INIT_API(hipProfilerStop); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/vdi/hip_rtc.cpp b/vdi/hip_rtc.cpp new file mode 100644 index 0000000000..9897b98b7f --- /dev/null +++ b/vdi/hip_rtc.cpp @@ -0,0 +1,393 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hiprtc_internal.hpp" +#include +#include "platform/program.hpp" + +namespace hiprtc { +thread_local hiprtcResult g_lastRtcError = HIPRTC_SUCCESS; +} + +class ProgramState { + amd::Monitor lock_; +private: + static ProgramState* programState_; + + ProgramState() : lock_("Guards program state") {} + ~ProgramState() {} +public: + std::unordered_map, std::vector>> progHeaders_; + + std::map> nameExpresssion_; + + static ProgramState& instance(); + void createProgramHeaders(amd::Program* program, int numHeaders, + const char** headers, const char** headerNames); + void getProgramHeaders(amd::Program* program, int* numHeaders, char** headers, char ** headerNames); + uint32_t addNameExpression(const char* name_expression); + char* getLoweredName(const char* name_expression); +}; + +ProgramState* ProgramState::programState_ = nullptr; + +ProgramState& ProgramState::instance() { + if (programState_ == nullptr) { + programState_ = new ProgramState; + } + return *programState_; +} + +void ProgramState::createProgramHeaders(amd::Program* program, int numHeaders, + const char** headers, const char** headerNames) { + amd::ScopedLock lock(lock_); + std::vector vHeaderNames; + std::vector vHeaders; + for (auto i = 0; i != numHeaders; ++i) { + vHeaders.emplace_back(headers[i]); + vHeaderNames.emplace_back(headerNames[i]); + progHeaders_[program] = std::make_pair(std::move(vHeaders), std::move(vHeaderNames)); + } +} + +void ProgramState::getProgramHeaders(amd::Program* program, int* numHeaders, + char** headers, char ** headerNames) { + amd::ScopedLock lock(lock_); + + const auto it = progHeaders_.find(program); + if (it != progHeaders_.cend()) { + *numHeaders = it->second.first.size(); + *headers = reinterpret_cast(it->second.first.data()); + *headerNames = reinterpret_cast(it->second.second.data()); + } +} + +uint32_t ProgramState::addNameExpression(const char* name_expression) { + amd::ScopedLock lock(lock_); + + // Strip clean of any '(' or ')' or '&' + std::string strippedName(name_expression); + if (strippedName.back() == ')') { + strippedName.pop_back(); + strippedName.erase(0, strippedName.find('(')); + } + if (strippedName.front() == '&') { + strippedName.erase(0, 1); + } + auto it = nameExpresssion_.find(name_expression); + if (it == nameExpresssion_.end()) { + nameExpresssion_.insert(std::pair> + (name_expression, std::make_pair(strippedName,""))); + } + return nameExpresssion_.size(); +} + +char* demangle(const char* loweredName) { + if (!loweredName) { + return nullptr; + } +#if __linux__ + int status = 0; + char* demangledName = DEMANGLE(loweredName, nullptr, nullptr, &status); + if (status != 0) { + return nullptr; + } +#elif defined(_WIN32) + char* demangledName = (char*)malloc(UNDECORATED_SIZE); + + if (!UnDecorateSymbolName(loweredName, demangledName, + UNDECORATED_SIZE/ sizeof(*demangledName), UNDNAME_COMPLETE)) + { + free(demangledName); + return nullptr; + } +#else +#error "Only Linux and Windows are supported" +#endif // __linux__ + return demangledName; +} + +static std::string handleMangledName(std::string name) { + std::string loweredName; + char* demangled = demangle(name.c_str()); + loweredName.assign(demangled == nullptr ? std::string() : demangled); + free(demangled); + + if (loweredName.empty()) { + return name; + } + + if (loweredName.find(".kd") != std::string::npos) { + return {}; + } + + if (loweredName.find("void ") == 0) { + loweredName.erase(0, strlen("void ")); + } + + auto dx{loweredName.find_first_of("(<")}; + + if (dx == std::string::npos) { + return loweredName; + } + + if (loweredName[dx] == '<') { + uint32_t count = 1; + do { + ++dx; + count += (loweredName[dx] == '<') ? 1 : ((loweredName[dx] == '>') ? -1 : 0); + } while (count); + + loweredName.erase(++dx); + } else { + loweredName.erase(dx); + } + + return loweredName; +} + +const char* hiprtcGetErrorString(hiprtcResult x) { + switch (x) { + case HIPRTC_SUCCESS: + return "HIPRTC_SUCCESS"; + case HIPRTC_ERROR_OUT_OF_MEMORY: + return "HIPRTC_ERROR_OUT_OF_MEMORY"; + case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: + return "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE"; + case HIPRTC_ERROR_INVALID_INPUT: + return "HIPRTC_ERROR_INVALID_INPUT"; + case HIPRTC_ERROR_INVALID_PROGRAM: + return "HIPRTC_ERROR_INVALID_PROGRAM"; + case HIPRTC_ERROR_INVALID_OPTION: + return "HIPRTC_ERROR_INVALID_OPTION"; + case HIPRTC_ERROR_COMPILATION: + return "HIPRTC_ERROR_COMPILATION"; + case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: + return "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE"; + case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + return "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION"; + case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + return "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION"; + case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID"; + case HIPRTC_ERROR_INTERNAL_ERROR: + return "HIPRTC_ERROR_INTERNAL_ERROR"; + default: + return nullptr; + }; + + ShouldNotReachHere(); + + return nullptr; +} + +hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, + int numHeaders, const char** headers, const char** headerNames) { + HIPRTC_INIT_API(prog, src, name, numHeaders, headers, headerNames); + + if (prog == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_PROGRAM); + } + if (numHeaders < 0) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + if (numHeaders && (headers == nullptr || headerNames == nullptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + amd::Program* program = new amd::Program(*hip::getCurrentDevice()->asContext(), src, amd::Program::HIP); + if (program == NULL) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentDevice()->devices()[0])) { + program->release(); + HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); + } + + ProgramState::instance().createProgramHeaders(program, numHeaders, headers, headerNames); + + *prog = reinterpret_cast(as_cl(program)); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) { + + // FIXME[skudchad] Add headers to amd::Program::build and device::Program::build, + // pass the saved from ProgramState to amd::Program::build + HIPRTC_INIT_API(prog, numOptions, options); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + std::ostringstream ostrstr; + std::vector oarr(&options[0], &options[numOptions]); + std::copy(oarr.begin(), oarr.end(), std::ostream_iterator(ostrstr, " ")); + + ostrstr.str().append(" -DHIP_VERSION_MAJOR=9"); + ostrstr.str().append(" -DHIP_VERSION_MINOR=0"); + + std::vector devices{hip::getCurrentDevice()->devices()[0]}; + if (CL_SUCCESS != program->build(devices, ostrstr.str().c_str(), nullptr, nullptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression) { + HIPRTC_INIT_API(prog, name_expression); + + if (name_expression == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + amd::Program* program = as_amd(reinterpret_cast(prog)); + + uint32_t id = ProgramState::instance().addNameExpression(name_expression); + + const auto var{"__hiprtc_" + std::to_string(id)}; + const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name_expression + ';'}; + + program->appendToSource(code.c_str()); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, + const char** loweredName) { + HIPRTC_INIT_API(prog, name_expression, loweredName); + + if (name_expression == nullptr || loweredName == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + device::Program* dev_program + = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + auto it = ProgramState::instance().nameExpresssion_.find(name_expression); + if (it == ProgramState::instance().nameExpresssion_.end()) { + return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID; + } + + std::string strippedName = it->second.first; + std::vector mangledNames; + + if (!dev_program->getLoweredNames(&mangledNames)) { + HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION); + } + + for (auto &name : mangledNames) { + std::string demangledName = handleMangledName(name); + if (demangledName == strippedName) { + it->second.second.assign(name); + } + } + + *loweredName = it->second.second.c_str(); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) { + HIPRTC_INIT_API(prog); + + if (prog == NULL) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + // Release program. hiprtcProgram is a double pointer so free *prog + amd::Program* program = as_amd(reinterpret_cast(*prog)); + + program->release(); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) { + HIPRTC_INIT_API(prog, binaryMem); + + + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program::binary_t& binary = + program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0])->binary(); + + ::memcpy(binaryMem, binary.first, binary.second); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) { + + HIPRTC_INIT_API(prog, binarySizeRet); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + + *binarySizeRet = + program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0])->binary().second; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) { + + HIPRTC_INIT_API(prog, dst); + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program* devProgram = + program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + auto log = program->programLog() + devProgram->buildLog().c_str(); + + log.copy(dst, log.size()); + dst[log.size()] = '\0'; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) { + + HIPRTC_INIT_API(prog, logSizeRet); + + amd::Program* program = as_amd(reinterpret_cast(prog)); + const device::Program* devProgram = + program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + auto log = program->programLog() + devProgram->buildLog().c_str(); + + *logSizeRet = log.size() + 1; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcVersion(int* major, int* minor) { + HIPRTC_INIT_API(major, minor); + + if (major == nullptr || minor == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + *major = 9; + *minor = 0; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} diff --git a/vdi/hip_stream.cpp b/vdi/hip_stream.cpp new file mode 100644 index 0000000000..eac42c0203 --- /dev/null +++ b/vdi/hip_stream.cpp @@ -0,0 +1,274 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "hip_event.hpp" +#include "thread/monitor.hpp" + +static amd::Monitor streamSetLock("Guards global stream set"); +static std::unordered_set streamSet; + +// Internal structure for stream callback handler +class StreamCallback { + public: + StreamCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + amd::Command* command) + : stream_(stream), callBack_(callback), + userData_(userData), command_(command) { + }; + hipStream_t stream_; + hipStreamCallback_t callBack_; + void* userData_; + amd::Command* command_; +}; + +namespace hip { + +void syncStreams(int devId) { + amd::ScopedLock lock(streamSetLock); + + for (const auto& it : streamSet) { + if (it->device->deviceId() == devId) { + it->finish(); + } + } +} + +void syncStreams() { + syncStreams(getCurrentDevice()->deviceId()); +} + +Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) : + queue(nullptr), lock("Stream Callback lock"), device(dev), priority(p), flags(f) {} + +void Stream::create() { + cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + queue = new amd::HostQueue(*device->asContext(), *device->devices()[0], properties, + amd::CommandQueue::RealTimeDisabled, priority); + assert(queue != nullptr); + queue->create(); +} + +amd::HostQueue* Stream::asHostQueue() { + if (queue == nullptr) { + create(); + } + return queue; +} + +void Stream::destroy() { + if (queue != nullptr) { + queue->release(); + queue = nullptr; + } +} + +void Stream::finish() { + if (queue != nullptr) { + queue->finish(); + } +} + +}; + +void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { + hipError_t status = hipSuccess; + StreamCallback* cbo = reinterpret_cast(user_data); + { + amd::ScopedLock lock(reinterpret_cast(cbo->stream_)->lock); + cbo->callBack_(cbo->stream_, status, cbo->userData_); + } + cbo->command_->release(); + delete cbo; +} + +static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) { + hip::Stream* hStream = new hip::Stream(hip::getCurrentDevice(), priority, flags); + + if (hStream == nullptr) { + return hipErrorOutOfMemory; + } + + if (!(flags & hipStreamNonBlocking)) { + hip::syncStreams(); + + { + amd::ScopedLock lock(streamSetLock); + streamSet.insert(hStream); + } + } + + *stream = reinterpret_cast(hStream); + + ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipStreamCreate: %zx", hStream); + + return hipSuccess; +} + +hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { + HIP_INIT_API(hipStreamCreateWithFlags, stream, flags); + + HIP_RETURN(ihipStreamCreate(stream, flags, amd::CommandQueue::Priority::Normal)); +} + +hipError_t hipStreamCreate(hipStream_t *stream) { + HIP_INIT_API(hipStreamCreate, stream); + + HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, amd::CommandQueue::Priority::Normal)); +} + +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { + HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority); + + if (priority > static_cast(amd::CommandQueue::Priority::High)) { + priority = static_cast(amd::CommandQueue::Priority::High); + } else if (priority < static_cast(amd::CommandQueue::Priority::Normal)) { + priority = static_cast(amd::CommandQueue::Priority::Normal); + } + + return HIP_RETURN(ihipStreamCreate(stream, flags, static_cast(priority))); +} + +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { + HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority); + + if (leastPriority != nullptr) { + *leastPriority = static_cast(amd::CommandQueue::Priority::Normal); + } + if (greatestPriority != nullptr) { + // Only report one kind of priority for now. + *greatestPriority = static_cast(amd::CommandQueue::Priority::Normal); + } + return HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { + HIP_INIT_API(hipStreamGetFlags, stream, flags); + + hip::Stream* hStream = reinterpret_cast(stream); + + if(flags != nullptr && hStream != nullptr) { + *flags = hStream->flags; + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamSynchronize(hipStream_t stream) { + HIP_INIT_API(hipStreamSynchronize, stream); + + amd::HostQueue* hostQueue = hip::getQueue(stream); + hostQueue->finish(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamDestroy(hipStream_t stream) { + HIP_INIT_API(hipStreamDestroy, stream); + + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + amd::ScopedLock lock(streamSetLock); + + hip::Stream* hStream = reinterpret_cast(stream); + + hStream->destroy(); + streamSet.erase(hStream); + + delete hStream; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { + HIP_INIT_API(hipStreamWaitEvent, stream, event, flags); + + amd::HostQueue* queue; + + if (stream == nullptr) { + queue = hip::getNullStream(); + } else { + queue = reinterpret_cast(stream)->asHostQueue(); + } + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + + return HIP_RETURN(e->streamWait(queue, flags)); +} + +hipError_t hipStreamQuery(hipStream_t stream) { + HIP_INIT_API(hipStreamQuery, stream); + + amd::HostQueue* hostQueue; + if (stream == nullptr) { + hostQueue = hip::getNullStream(); + } else { + hostQueue = reinterpret_cast(stream)->asHostQueue(); + } + + amd::Command* command = hostQueue->getLastQueuedCommand(true); + if (command == nullptr) { + HIP_RETURN(hipSuccess); + } + + amd::Event& event = command->event(); + if (command->type() != 0) { + event.notifyCmdQueue(); + } + hipError_t status = (command->status() == CL_COMPLETE) ? hipSuccess : hipErrorNotReady; + command->release(); + HIP_RETURN(status); +} + +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags) { + HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); + + amd::HostQueue* hostQueue = reinterpret_cast + (stream)->asHostQueue(); + amd::Command* command = hostQueue->getLastQueuedCommand(true); + if (command == nullptr) { + amd::Command::EventWaitList eventWaitList; + command = new amd::Marker(*hostQueue, false, eventWaitList); + command->enqueue(); + } + amd::Event& event = command->event(); + StreamCallback* cbo = new StreamCallback(stream, callback, userData, command); + + if(!event.setCallback(CL_COMPLETE, ihipStreamCallback, reinterpret_cast(cbo))) { + command->release(); + return hipErrorInvalidHandle; + } + + event.notifyCmdQueue(); + + HIP_RETURN(hipSuccess); +} + + diff --git a/vdi/hip_surface.cpp b/vdi/hip_surface.cpp new file mode 100644 index 0000000000..5adaf418cf --- /dev/null +++ b/vdi/hip_surface.cpp @@ -0,0 +1,37 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" +#include + +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, + const hipResourceDesc* pResDesc) { + HIP_INIT_API(hipCreateSurfaceObject, pSurfObject, pResDesc); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { + HIP_INIT_API(hipDestroySurfaceObject, surfaceObject); + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/vdi/hip_texture.cpp b/vdi/hip_texture.cpp new file mode 100644 index 0000000000..b837729721 --- /dev/null +++ b/vdi/hip_texture.cpp @@ -0,0 +1,1207 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include +#include "hip_internal.hpp" +#include "hip_conversions.hpp" +#include "platform/sampler.hpp" + +struct __hip_texture { + uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD]; + uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD]; + amd::Image* image; + amd::Sampler* sampler; + hipResourceDesc resDesc; + hipTextureDesc texDesc; + hipResourceViewDesc resViewDesc; + + __hip_texture(amd::Image* image_, + amd::Sampler* sampler_, + const hipResourceDesc& resDesc_, + const hipTextureDesc& texDesc_, + const hipResourceViewDesc& resViewDesc_) : + image(image_), + sampler(sampler_), + resDesc(resDesc_), + texDesc(texDesc_), + resViewDesc(resViewDesc_) { + amd::Context& context = *hip::getCurrentDevice()->asContext(); + amd::Device& device = *context.devices()[0]; + + device::Memory* imageMem = image->getDeviceMemory(device); + std::memcpy(imageSRD, imageMem->cpuSrd(), sizeof(imageSRD)); + + device::Sampler* samplerMem = sampler->getDeviceSampler(device); + std::memcpy(samplerSRD, samplerMem->hwState(), sizeof(samplerSRD)); + } +}; + +amd::Image* ihipImageCreate(const cl_channel_order channelOrder, + const cl_channel_type channelType, + const cl_mem_object_type imageType, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const size_t imageArraySize, + const size_t imageRowPitch, + const size_t imageSlicePitch, + const uint32_t numMipLevels, + amd::Memory* buffer); + +hipError_t ihipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + + // pResViewDesc can only be specified if the type of resource is a HIP array or a HIP mipmapped array. + if ((pResViewDesc != nullptr) && + ((pResDesc->resType != hipResourceTypeArray) && (pResDesc->resType != hipResourceTypeMipmappedArray))) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypeArray, + // hipResourceDesc::res::array::array must be set to a valid HIP array handle. + if ((pResDesc->resType == hipResourceTypeArray) && + (pResDesc->res.array.array == nullptr)) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypeMipmappedArray, + // hipResourceDesc::res::mipmap::mipmap must be set to a valid HIP mipmapped array handle + // and hipTextureDesc::normalizedCoords must be set to true. + if ((pResDesc->resType == hipResourceTypeMipmappedArray) && + ((pResDesc->res.mipmap.mipmap == nullptr) || (pTexDesc->normalizedCoords == 0))) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypeLinear, + // hipResourceDesc::res::linear::devPtr must be set to a valid device pointer, that is aligned to hipDeviceProp::textureAlignment. + // The total number of elements in the linear address range cannot exceed hipDeviceProp::maxTexture1DLinear. + if ((pResDesc->resType == hipResourceTypeLinear) && + ((pResDesc->res.linear.devPtr == nullptr) || + (!amd::isMultipleOf(pResDesc->res.linear.devPtr, info.imageBaseAddressAlignment_)) || + ((pResDesc->res.linear.sizeInBytes / hip::getElementSize(pResDesc->res.linear.desc)) >= info.imageMaxBufferSize_))) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypePitch2D, + // hipResourceDesc::res::pitch2D::devPtr must be set to a valid device pointer, that is aligned to hipDeviceProp::textureAlignment. + // hipResourceDesc::res::pitch2D::width and hipResourceDesc::res::pitch2D::height specify the width and height of the array in elements, + // and cannot exceed hipDeviceProp::maxTexture2DLinear[0] and hipDeviceProp::maxTexture2DLinear[1] respectively. + // hipResourceDesc::res::pitch2D::pitchInBytes specifies the pitch between two rows in bytes and has to be aligned to hipDeviceProp::texturePitchAlignment. + // Pitch cannot exceed hipDeviceProp::maxTexture2DLinear[2]. + if ((pResDesc->resType == hipResourceTypePitch2D) && + ((pResDesc->res.pitch2D.devPtr == nullptr) || + (!amd::isMultipleOf(pResDesc->res.pitch2D.devPtr, info.imageBaseAddressAlignment_)) || + (pResDesc->res.pitch2D.width >= info.image2DMaxWidth_) || + (pResDesc->res.pitch2D.height >= info.image2DMaxHeight_) || + (!amd::isMultipleOf(pResDesc->res.pitch2D.pitchInBytes, info.imagePitchAlignment_)))) { + // TODO check pitch limits. + return hipErrorInvalidValue; + } + + // Mipmaps are currently not supported. + if (pResDesc->resType == hipResourceTypeMipmappedArray) { + return hipErrorNotSupported; + } + // We don't program the border_color_ptr field in the HW sampler SRD. + if (pTexDesc->addressMode[0] == hipAddressModeBorder) { + return hipErrorNotSupported; + } + // We don't program the max_ansio_ratio field in the the HW sampler SRD. + if (pTexDesc->maxAnisotropy != 0) { + return hipErrorNotSupported; + } + // We don't program the lod_bias field in the HW sampler SRD. + if (pTexDesc->mipmapLevelBias != 0) { + return hipErrorNotSupported; + } + // We don't program the min_lod field in the HW sampler SRD. + if (pTexDesc->minMipmapLevelClamp != 0) { + return hipErrorNotSupported; + } + // We don't program the max_lod field in the HW sampler SRD. + if (pTexDesc->maxMipmapLevelClamp != 0) { + return hipErrorNotSupported; + } + + // TODO VDI assumes all dimensions have the same addressing mode. + cl_addressing_mode addressMode = CL_ADDRESS_NONE; + // If hipTextureDesc::normalizedCoords is set to zero, + // hipAddressModeWrap and hipAddressModeMirror won't be supported + // and will be switched to hipAddressModeClamp. + if ((pTexDesc->normalizedCoords == 0) && + ((pTexDesc->addressMode[0] == hipAddressModeWrap) || (pTexDesc->addressMode[0] == hipAddressModeMirror))) { + addressMode = hip::getCLAddressingMode(hipAddressModeClamp); + } + // hipTextureDesc::addressMode is ignored if hipResourceDesc::resType is hipResourceTypeLinear + else if (pResDesc->resType != hipResourceTypeLinear) { + addressMode = hip::getCLAddressingMode(pTexDesc->addressMode[0]); + } + +#ifndef CL_FILTER_NONE +#define CL_FILTER_NONE 0x1142 +#endif + cl_filter_mode filterMode = CL_FILTER_NONE; +#undef CL_FILTER_NONE + // hipTextureDesc::filterMode is ignored if hipResourceDesc::resType is hipResourceTypeLinear. + if (pResDesc->resType != hipResourceTypeLinear) { + filterMode = hip::getCLFilterMode(pTexDesc->filterMode); + } + +#ifndef CL_FILTER_NONE +#define CL_FILTER_NONE 0x1142 +#endif + cl_filter_mode mipFilterMode = CL_FILTER_NONE; +#undef CL_FILTER_NONE + if (pResDesc->resType == hipResourceTypeMipmappedArray) { + mipFilterMode = hip::getCLFilterMode(pTexDesc->mipmapFilterMode); + } + + amd::Sampler* sampler = new amd::Sampler(*hip::getCurrentDevice()->asContext(), + pTexDesc->normalizedCoords, + addressMode, + filterMode, + mipFilterMode, + pTexDesc->minMipmapLevelClamp, + pTexDesc->maxMipmapLevelClamp); + + if (sampler == nullptr) { + return hipErrorOutOfMemory; + } + + if (!sampler->create()) { + delete sampler; + return hipErrorOutOfMemory; + } + + amd::Image* image = nullptr; + switch (pResDesc->resType) { + case hipResourceTypeArray: { + cl_mem memObj = reinterpret_cast(pResDesc->res.array.array->data); + if (!is_valid(memObj)) { + return hipErrorInvalidValue; + } + image = as_amd(memObj)->asImage(); + + hipTextureReadMode readMode = pTexDesc->readMode; + // 32-bit integer format will not be promoted, regardless of whether or not + // this hipTextureDesc::readMode is set hipReadModeNormalizedFloat is specified. + if ((pResDesc->res.array.array->Format == HIP_AD_FORMAT_SIGNED_INT32) || + (pResDesc->res.array.array->Format == HIP_AD_FORMAT_UNSIGNED_INT32)) { + readMode = hipReadModeElementType; + } + + // We need to create an image view if the user requested to use normalized pixel values, + // due to already having the image created with a different format. + if ((pResViewDesc != nullptr) || + (readMode == hipReadModeNormalizedFloat) || + (pTexDesc->sRGB == 1)) { + // TODO VDI currently right now can only change the format of the image. + const cl_channel_order channelOrder = (pResViewDesc != nullptr) ? hip::getCLChannelOrder(hip::getNumChannels(pResViewDesc->format), pTexDesc->sRGB) : + hip::getCLChannelOrder(pResDesc->res.array.array->NumChannels, pTexDesc->sRGB); + const cl_channel_type channelType = (pResViewDesc != nullptr) ? hip::getCLChannelType(hip::getArrayFormat(pResViewDesc->format), readMode) : + hip::getCLChannelType(pResDesc->res.array.array->Format, readMode); + const amd::Image::Format imageFormat(cl_image_format{channelOrder, channelType}); + if (!imageFormat.isValid()) { + return hipErrorInvalidValue; + } + + image = image->createView(*hip::getCurrentDevice()->asContext(), imageFormat, nullptr); + if (image == nullptr) { + return hipErrorInvalidValue; + } + } + break; + } + case hipResourceTypeMipmappedArray: { + ShouldNotReachHere(); + break; + } + case hipResourceTypeLinear: { + const cl_channel_order channelOrder = hip::getCLChannelOrder(hip::getNumChannels(pResDesc->res.linear.desc), pTexDesc->sRGB); + const cl_channel_type channelType = hip::getCLChannelType(hip::getArrayFormat(pResDesc->res.linear.desc), pTexDesc->readMode); + const amd::Image::Format imageFormat({channelOrder, channelType}); + const cl_mem_object_type imageType = hip::getCLMemObjectType(pResDesc->resType); + size_t offset = 0; + image = ihipImageCreate(channelOrder, + channelType, + imageType, + (pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize()), /* imageWidth */ + 0, /* imageHeight */ + 0, /* imageDepth */ + 0, /* imageArraySize */ + 0, /* imageRowPitch */ + 0, /* imageSlicePitch */ + 0, /* numMipLevels */ + getMemoryObject(pResDesc->res.linear.devPtr, offset)); + // TODO take care of non-zero offset. + assert(offset == 0); + if (image == nullptr) { + return hipErrorInvalidValue; + } + break; + } + case hipResourceTypePitch2D: { + const cl_channel_order channelOrder = hip::getCLChannelOrder(hip::getNumChannels(pResDesc->res.pitch2D.desc), pTexDesc->sRGB); + const cl_channel_type channelType = hip::getCLChannelType(hip::getArrayFormat(pResDesc->res.pitch2D.desc), pTexDesc->readMode); + const cl_mem_object_type imageType = hip::getCLMemObjectType(pResDesc->resType); + size_t offset = 0; + image = ihipImageCreate(channelOrder, + channelType, + imageType, + pResDesc->res.pitch2D.width, /* imageWidth */ + pResDesc->res.pitch2D.height, /* imageHeight */ + 0, /* imageDepth */ + 0, /* imageArraySize */ + pResDesc->res.pitch2D.pitchInBytes, /* imageRowPitch */ + 0, /* imageSlicePitch */ + 0, /* numMipLevels */ + getMemoryObject(pResDesc->res.pitch2D.devPtr, offset)); + // TODO take care of non-zero offset. + assert(offset == 0); + if (image == nullptr) { + return hipErrorInvalidValue; + } + break; + } + } + + void *texObjectBuffer = nullptr; + ihipMalloc(&texObjectBuffer, sizeof(__hip_texture), CL_MEM_SVM_FINE_GRAIN_BUFFER); + if (texObjectBuffer == nullptr) { + return hipErrorOutOfMemory; + } + *pTexObject = new (texObjectBuffer) __hip_texture{image, sampler, *pResDesc, *pTexDesc, (pResViewDesc != nullptr) ? *pResViewDesc : hipResourceViewDesc{}}; + + return hipSuccess; +} + +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + HIP_INIT_API(hipCreateTextureObject, pTexObject, pResDesc, pTexDesc, pResViewDesc); + + HIP_RETURN(ihipCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc)); +} + + +hipError_t ihipDestroyTextureObject(hipTextureObject_t texObject) { + if (texObject == nullptr) { + return hipErrorInvalidValue; + } + + const hipResourceType type = texObject->resDesc.resType; + const bool isImageFromBuffer = (type == hipResourceTypeLinear) || (type == hipResourceTypePitch2D); + const bool isImageView = ((type == hipResourceTypeArray) || (type == hipResourceTypeMipmappedArray)) && + !texObject->image->isParent(); + if (isImageFromBuffer || isImageView) { + texObject->image->release(); + } + + // TODO Should call ihipFree() to not polute the api trace. + return hipFree(texObject); +} + +hipError_t hipDestroyTextureObject(hipTextureObject_t texObject) { + HIP_INIT_API(hipDestroyTextureObject, texObject); + + HIP_RETURN(ihipDestroyTextureObject(texObject)); +} + + +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipGetTextureObjectResourceDesc, pResDesc, texObject); + + if ((pResDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pResDesc = texObject->resDesc; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipGetTextureObjectResourceViewDesc, pResViewDesc, texObject); + + if ((pResViewDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pResViewDesc = texObject->resViewDesc; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipGetTextureObjectTextureDesc, pTexDesc, texObject); + + if ((pTexDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pTexDesc = texObject->texDesc; + + HIP_RETURN(hipSuccess); +} + +inline bool ihipGetTextureAlignmentOffset(size_t* offset, + const void* devPtr) { + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + + const char* alignedDevPtr = amd::alignUp(static_cast(devPtr), info.imageBaseAddressAlignment_); + const size_t alignedOffset = alignedDevPtr - static_cast(devPtr); + + // If the device memory pointer was returned from hipMalloc(), + // the offset is guaranteed to be 0 and NULL may be passed as the offset parameter. + if ((alignedOffset != 0) && + (offset == nullptr)) { + return false; + } + + if (offset != nullptr) { + *offset = alignedOffset; + } + + return true; +} + +hipError_t ihipBindTexture(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size) { + if ((texref == nullptr) || + (devPtr == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texref->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeLinear; + resDesc.res.linear.devPtr = const_cast(devPtr); + resDesc.res.linear.desc = *desc; + resDesc.res.linear.sizeInBytes = size; + + if (ihipGetTextureAlignmentOffset(offset, devPtr)) { + // Align the user ptr to HW requirments. + resDesc.res.linear.devPtr = static_cast(const_cast(devPtr)) - *offset; + } else { + return hipErrorInvalidValue; + } + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, nullptr); +} + +hipError_t ihipBindTexture2D(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch) { + if ((texref == nullptr) || + (devPtr == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texref->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypePitch2D; + resDesc.res.pitch2D.devPtr = const_cast(devPtr); + resDesc.res.pitch2D.desc = *desc; + resDesc.res.pitch2D.width = width; + resDesc.res.pitch2D.height = height; + resDesc.res.pitch2D.pitchInBytes = pitch; + + if (ihipGetTextureAlignmentOffset(offset, devPtr)) { + // Align the user ptr to HW requirments. + resDesc.res.pitch2D.devPtr = static_cast(const_cast(devPtr)) - *offset; + } else { + return hipErrorInvalidValue; + } + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, nullptr); +} + +hipError_t hipBindTexture2D(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch) { + HIP_INIT_API(hipBindTexture2D, offset, texref, devPtr, desc, width, height, pitch); + + HIP_RETURN(ihipBindTexture2D(offset, texref, devPtr, desc, width, height, pitch)); +} + +hipError_t ihipBindTextureToArray(const textureReference* texref, + hipArray_const_t array, + const hipChannelFormatDesc* desc) { + if ((texref == nullptr) || + (array == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texref->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = const_cast(array); + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + hipResourceViewFormat format = hip::getResourceViewFormat(*desc); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(array, format); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, &resViewDesc); +} + +hipError_t hipBindTextureToArray(const textureReference* texref, + hipArray_const_t array, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(hipBindTextureToArray, texref, array, desc); + + HIP_RETURN(ihipBindTextureToArray(texref, array, desc)); +} + +hipError_t ihipBindTextureToMipmappedArray(const textureReference* texref, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc) { + if ((texref == nullptr) || + (mipmappedArray == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texref->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeMipmappedArray; + resDesc.res.mipmap.mipmap = const_cast(mipmappedArray); + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + hipResourceViewFormat format = hip::getResourceViewFormat(*desc); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(mipmappedArray, format); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, &resViewDesc); +} + +hipError_t hipBindTextureToMipmappedArray(const textureReference* texref, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(hipBindTextureToMipmappedArray, texref, mipmappedArray, desc); + + HIP_RETURN(ihipBindTextureToMipmappedArray(texref, mipmappedArray, desc)); +} + +hipError_t hipUnbindTexture(const textureReference* texref) { + HIP_INIT_API(hipUnbindTexture, texref); + + if (texref == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const hipTextureObject_t textureObject = texref->textureObject; + const_cast(texref)->textureObject = nullptr; + + HIP_RETURN(ihipDestroyTextureObject(textureObject)); +} + +hipError_t hipBindTexture(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size) { + HIP_INIT_API(hipBindTexture, offset, texref, devPtr, desc, size); + + HIP_RETURN(ihipBindTexture(offset, texref, devPtr, desc, size)); +} + +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, + hipArray_const_t array) { + HIP_INIT_API(hipGetChannelDesc, desc, array); + + if ((desc == nullptr) || + (array == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // It is UB to call hipGetChannelDesc() on an array created via hipArrayCreate()/hipArray3DCreate(). + // This is due to hip not differentiating between runtime and driver types. + *desc = array->desc; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureAlignmentOffset(size_t* offset, + const textureReference* texref) { + HIP_INIT_API(hipGetTextureAlignmentOffset, offset, texref); + + if ((offset == nullptr) || + (texref == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // TODO enforce alignment on devPtr. + *offset = 0; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol) { + HIP_INIT_API(hipGetTextureReference, texref, symbol); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipTexRefSetFormat(textureReference* texRef, + hipArray_Format fmt, + int NumPackedComponents) { + HIP_INIT_API(hipTexRefSetFormat, texRef, fmt, NumPackedComponents); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->format = fmt; + texRef->numChannels = NumPackedComponents; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetFlags(textureReference* texRef, + unsigned int Flags) { + HIP_INIT_API(hipTexRefSetFlags, texRef, Flags); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->readMode = hipReadModeNormalizedFloat; + texRef->normalized = 0; + texRef->sRGB = 0; + + if (Flags & HIP_TRSF_READ_AS_INTEGER) { + texRef->readMode = hipReadModeElementType; + } + + if (Flags & HIP_TRSF_NORMALIZED_COORDINATES) { + texRef->normalized = 1; + } + + if (Flags & HIP_TRSF_SRGB) { + texRef->sRGB = 1; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetFilterMode(textureReference* texRef, + hipTextureFilterMode fm) { + HIP_INIT_API(hipTexRefSetFilterMode, texRef, fm); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->filterMode = fm; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* pam, + const textureReference* texRef, + int dim) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetAddressMode, pam, texRef, dim); + + if ((pam == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Currently, the only valid value for dim are 0 and 1. + if ((dim != 0) || (dim != 1)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pam = texRef->addressMode[dim]; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetAddressMode(textureReference* texRef, + int dim, + hipTextureAddressMode am) { + HIP_INIT_API(hipTexRefSetAddressMode, texRef, dim, am); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if ((dim < 0) || (dim > 2)) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->addressMode[dim] = am; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetArray(hipArray_t* pArray, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetArray, pArray, texRef); + + if ((pArray == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = {}; + // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. + hipError_t error = hipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); + if (error != hipSuccess) { + return HIP_RETURN(error); + } + + switch (resDesc.resType) { + case hipResourceTypeLinear: + case hipResourceTypePitch2D: + case hipResourceTypeMipmappedArray: + HIP_RETURN(hipErrorInvalidValue); + case hipResourceTypeArray: + *pArray = resDesc.res.array.array; + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetArray(textureReference* texRef, + hipArray_const_t array, + unsigned int flags) { + HIP_INIT_API(hipTexRefSetArray, texRef, array, flags); + + if ((texRef == nullptr) || + (array == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (flags != HIP_TRSA_OVERRIDE_FORMAT) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texRef->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = const_cast(array); + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(array, format); + + HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc)); +} + +hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetAddress, dptr, texRef); + + if ((dptr == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = {}; + // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. + hipError_t error = hipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); + if (error != hipSuccess) { + return HIP_RETURN(error); + } + + switch (resDesc.resType) { + // Need to verify. + // If the texture reference is not bound to any device memory range, + // return hipErroInvalidValue. + case hipResourceTypeArray: + case hipResourceTypeMipmappedArray: + HIP_RETURN(hipErrorInvalidValue); + case hipResourceTypeLinear: + *dptr = resDesc.res.linear.devPtr; + break; + case hipResourceTypePitch2D: + *dptr = resDesc.res.pitch2D.devPtr; + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetAddress(size_t* ByteOffset, + textureReference* texRef, + hipDeviceptr_t dptr, + size_t bytes) { + HIP_INIT_API(hipTexRefSetAddress, ByteOffset, texRef, dptr, bytes); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texRef->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeLinear; + resDesc.res.linear.devPtr = dptr; + resDesc.res.linear.desc = hip::getChannelFormatDesc(texRef->numChannels, texRef->format); + resDesc.res.linear.sizeInBytes = bytes; + + if (ihipGetTextureAlignmentOffset(ByteOffset, dptr)) { + // Align the user ptr to HW requirments. + resDesc.res.linear.devPtr = static_cast(dptr) - *ByteOffset; + } else { + return HIP_RETURN(hipErrorInvalidValue); + } + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr)); +} + +hipError_t hipTexRefSetAddress2D(textureReference* texRef, + const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t dptr, + size_t Pitch) { + HIP_INIT_API(hipTexRefSetAddress2D, texRef, desc, dptr, Pitch); + + if ((texRef == nullptr) || + (desc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texRef->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypePitch2D; + resDesc.res.linear.devPtr = dptr; + resDesc.res.linear.desc = hip::getChannelFormatDesc(desc->NumChannels, desc->Format); // Need to verify. + resDesc.res.pitch2D.width = desc->Width; + resDesc.res.pitch2D.height = desc->Height; + resDesc.res.pitch2D.pitchInBytes = Pitch; + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr)); +} + +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { + return {x, y, z, w, f}; +} + +hipError_t hipTexRefGetBorderColor(float* pBorderColor, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetBorderColor, pBorderColor, texRef); + + if ((pBorderColor == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // TODO add textureReference::borderColor. + assert(false && "textureReference::borderColor is missing in header"); + // std::memcpy(pBorderColor, texRef.borderColor, sizeof(texRef.borderColor)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetFilterMode(hipTextureFilterMode* pfm, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetFilterMode, pfm, texRef); + + if ((pfm == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pfm = texRef->filterMode; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetFlags(unsigned int* pFlags, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetFlags, pFlags, texRef); + + if ((pFlags == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pFlags = 0; + + if (texRef->readMode == hipReadModeElementType) { + *pFlags |= HIP_TRSF_READ_AS_INTEGER; + } + + if (texRef->normalized == 1) { + *pFlags |= HIP_TRSF_NORMALIZED_COORDINATES; + } + + if (texRef->sRGB == 1) { + *pFlags |= HIP_TRSF_SRGB; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetFormat(hipArray_Format* pFormat, + int* pNumChannels, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetFormat, pFormat, pNumChannels, texRef); + + if ((pFormat == nullptr) || + (pNumChannels == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pFormat = texRef->format; + *pNumChannels = texRef->numChannels; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetMaxAnisotropy(int* pmaxAnsio, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMaxAnisotropy, pmaxAnsio, texRef); + + if ((pmaxAnsio == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pmaxAnsio = texRef->maxAnisotropy; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmapFilterMode(hipTextureFilterMode* pfm, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmapFilterMode, pfm, texRef); + + if ((pfm == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pfm = texRef->mipmapFilterMode; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmapLevelBias(float* pbias, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmapLevelBias, pbias, texRef); + + if ((pbias == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pbias = texRef->mipmapLevelBias; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, + float* pmaxMipmapLevelClamp, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmapLevelClamp, pminMipmapLevelClamp, pmaxMipmapLevelClamp, texRef); + + if ((pminMipmapLevelClamp == nullptr) || + (pmaxMipmapLevelClamp == nullptr) || + (texRef == nullptr)){ + HIP_RETURN(hipErrorInvalidValue); + } + + *pminMipmapLevelClamp = texRef->minMipmapLevelClamp; + *pmaxMipmapLevelClamp = texRef->maxMipmapLevelClamp; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmappedArray(hipMipmappedArray_t* pArray, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmappedArray, pArray, &texRef); + + if ((pArray == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = {}; + // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. + hipError_t error = hipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); + if (error != hipSuccess) { + return HIP_RETURN(error); + } + + switch (resDesc.resType) { + case hipResourceTypeLinear: + case hipResourceTypePitch2D: + case hipResourceTypeArray: + HIP_RETURN(hipErrorInvalidValue); + case hipResourceTypeMipmappedArray: + *pArray = resDesc.res.mipmap.mipmap; + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetBorderColor(textureReference* texRef, + float* pBorderColor) { + HIP_INIT_API(hipTexRefSetBorderColor, texRef, pBorderColor); + + if ((texRef == nullptr) || + (pBorderColor == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // TODO add textureReference::borderColor. + assert(false && "textureReference::borderColor is missing in header"); + // std::memcpy(texRef.borderColor, pBorderColor, sizeof(texRef.borderColor)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMaxAnisotropy(textureReference* texRef, + unsigned int maxAniso) { + HIP_INIT_API(hipTexRefSetMaxAnisotropy, texRef, maxAniso); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->maxAnisotropy = maxAniso; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmapFilterMode(textureReference* texRef, + hipTextureFilterMode fm) { + HIP_INIT_API(hipTexRefSetMipmapFilterMode, texRef, fm); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->mipmapFilterMode = fm; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmapLevelBias(textureReference* texRef, + float bias) { + HIP_INIT_API(hipTexRefSetMipmapLevelBias, texRef, bias); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->mipmapLevelBias = bias; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmapLevelClamp(textureReference* texRef, + float minMipMapLevelClamp, + float maxMipMapLevelClamp) { + HIP_INIT_API(hipTexRefSetMipmapLevelClamp, minMipMapLevelClamp, maxMipMapLevelClamp); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + texRef->minMipmapLevelClamp = minMipMapLevelClamp; + texRef->maxMipmapLevelClamp = maxMipMapLevelClamp; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, + hipMipmappedArray* mipmappedArray, + unsigned int Flags) { + HIP_INIT_API(hipTexRefSetMipmappedArray, texRef, mipmappedArray, Flags); + + if ((texRef == nullptr) || + (mipmappedArray == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (Flags != HIP_TRSA_OVERRIDE_FORMAT) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + ihipDestroyTextureObject(texRef->textureObject); + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeMipmappedArray; + resDesc.res.mipmap.mipmap = mipmappedArray; + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(mipmappedArray, format); + + HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc)); +} + +hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, + const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc) { + HIP_INIT_API(hipTexObjectCreate, pTexObject, pResDesc, pTexDesc, pResViewDesc); + + if ((pTexObject == nullptr) || + (pResDesc == nullptr) || (pTexDesc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = hip::getResourceDesc(*pResDesc); + hipTextureDesc texDesc = hip::getTextureDesc(*pTexDesc); + + if (pResViewDesc != nullptr) { + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(*pResViewDesc); + HIP_RETURN(ihipCreateTextureObject(pTexObject, &resDesc, &texDesc, &resViewDesc)); + } else { + HIP_RETURN(ihipCreateTextureObject(pTexObject, &resDesc, &texDesc, nullptr)); + } +} + +hipError_t hipTexObjectDestroy(hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectDestroy, texObject); + + HIP_RETURN(ihipDestroyTextureObject(texObject)); +} + +hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectGetResourceDesc, pResDesc, texObject); + + if ((pResDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pResDesc = hip::getResourceDesc(texObject->resDesc); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectGetResourceViewDesc, pResViewDesc, texObject); + + if ((pResViewDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pResViewDesc = hip::getResourceViewDesc(texObject->resViewDesc); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectGetTextureDesc, pTexDesc, texObject); + + if ((pTexDesc == nullptr) || + (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pTexDesc = hip::getTextureDesc(texObject->texDesc); + + HIP_RETURN(hipSuccess); +} diff --git a/vdi/hiprtc_internal.hpp b/vdi/hiprtc_internal.hpp new file mode 100644 index 0000000000..4e533c9716 --- /dev/null +++ b/vdi/hiprtc_internal.hpp @@ -0,0 +1,65 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIPRTC_SRC_HIP_INTERNAL_H +#define HIPRTC_SRC_HIP_INTERNAL_H + +#include "hip_internal.hpp" + +#if __linux__ +#include + +#if HIPRTC_USE_CXXABI +#include + +#define DEMANGLE abi::__cxa_demangle + +#else +extern "C" char * __cxa_demangle(const char *mangled_name, char *output_buffer, + size_t *length, int *status); + +#define DEMANGLE __cxa_demangle +#endif //HIPRTC_USE_CXXABI + +#elif defined(_WIN32) +#include +#include + +#define UNDECORATED_SIZE 4096 + +#endif // __linux__ + +// This macro should be called at the beginning of every HIP RTC API. +#define HIPRTC_INIT_API(...) \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \ + amd::Thread* thread = amd::Thread::current(); \ + if (!VDI_CHECK_THREAD(thread)) { \ + HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ + } \ + HIP_INIT(); + +#define HIPRTC_RETURN(ret) \ + hiprtc::g_lastRtcError = ret; \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, \ + hiprtcGetErrorString(hiprtc::g_lastRtcError)); \ + return hiprtc::g_lastRtcError; + + +#endif // HIPRTC_SRC_HIP_INTERNAL_H diff --git a/vdi/trace_helper.h b/vdi/trace_helper.h new file mode 100644 index 0000000000..432cabb583 --- /dev/null +++ b/vdi/trace_helper.h @@ -0,0 +1,254 @@ +/* Copyright (c) 2015-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once + +#include +#include +#include +#include +//--- +// Helper functions to convert HIP function arguments into strings. +// Handles POD data types as well as enumerations (ie hipMemcpyKind). +// The implementation uses C++11 variadic templates and template specialization. +// The hipMemcpyKind example below is a good example that shows how to implement conversion for a +// new HSA type. + + +// Handy macro to convert an enumeration to a stringified version of same: +#define CASE_STR(x) \ + case x: \ + return #x; + +inline const char* ihipErrorString(hipError_t hip_error) { + switch (hip_error) { + CASE_STR(hipSuccess); + CASE_STR(hipErrorOutOfMemory); + CASE_STR(hipErrorNotInitialized); + CASE_STR(hipErrorDeinitialized); + CASE_STR(hipErrorProfilerDisabled); + CASE_STR(hipErrorProfilerNotInitialized); + CASE_STR(hipErrorProfilerAlreadyStarted); + CASE_STR(hipErrorProfilerAlreadyStopped); + CASE_STR(hipErrorInvalidImage); + CASE_STR(hipErrorInvalidContext); + CASE_STR(hipErrorContextAlreadyCurrent); + CASE_STR(hipErrorMapFailed); + CASE_STR(hipErrorUnmapFailed); + CASE_STR(hipErrorArrayIsMapped); + CASE_STR(hipErrorAlreadyMapped); + CASE_STR(hipErrorNoBinaryForGpu); + CASE_STR(hipErrorAlreadyAcquired); + CASE_STR(hipErrorNotMapped); + CASE_STR(hipErrorNotMappedAsArray); + CASE_STR(hipErrorNotMappedAsPointer); + CASE_STR(hipErrorECCNotCorrectable); + CASE_STR(hipErrorUnsupportedLimit); + CASE_STR(hipErrorContextAlreadyInUse); + CASE_STR(hipErrorPeerAccessUnsupported); + CASE_STR(hipErrorInvalidKernelFile); + CASE_STR(hipErrorInvalidGraphicsContext); + CASE_STR(hipErrorInvalidSource); + CASE_STR(hipErrorFileNotFound); + CASE_STR(hipErrorSharedObjectSymbolNotFound); + CASE_STR(hipErrorSharedObjectInitFailed); + CASE_STR(hipErrorOperatingSystem); + CASE_STR(hipErrorSetOnActiveProcess); + CASE_STR(hipErrorInvalidHandle); + CASE_STR(hipErrorNotFound); + CASE_STR(hipErrorIllegalAddress); + CASE_STR(hipErrorMissingConfiguration); + CASE_STR(hipErrorLaunchFailure); + CASE_STR(hipErrorPriorLaunchFailure); + CASE_STR(hipErrorLaunchTimeOut); + CASE_STR(hipErrorLaunchOutOfResources); + CASE_STR(hipErrorInvalidDeviceFunction); + CASE_STR(hipErrorInvalidConfiguration); + CASE_STR(hipErrorInvalidDevice); + CASE_STR(hipErrorInvalidValue); + CASE_STR(hipErrorInvalidDevicePointer); + CASE_STR(hipErrorInvalidMemcpyDirection); + CASE_STR(hipErrorUnknown); + CASE_STR(hipErrorNotReady); + CASE_STR(hipErrorNoDevice); + CASE_STR(hipErrorPeerAccessAlreadyEnabled); + CASE_STR(hipErrorPeerAccessNotEnabled); + CASE_STR(hipErrorRuntimeMemory); + CASE_STR(hipErrorRuntimeOther); + CASE_STR(hipErrorHostMemoryAlreadyRegistered); + CASE_STR(hipErrorHostMemoryNotRegistered); + CASE_STR(hipErrorTbd); + default: + return "hipErrorUnknown"; + }; +}; + +// Building block functions: +template +inline std::string ToHexString(T v) { + std::ostringstream ss; + ss << "0x" << std::hex << v; + return ss.str(); +}; + +template +inline std::string ToString(T* v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; + +template +inline std::string ToString(T** v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; + +//--- +// Template overloads for ToString to handle specific types + +// This is the default which works for most types: +template +inline std::string ToString(T v) { + std::ostringstream ss; + ss << v; + return ss.str(); +}; + +template <> +inline std::string ToString(hipFunction_t v) { + std::ostringstream ss; + ss << "0x" << std::hex << static_cast(v); + return ss.str(); +}; + +// hipEvent_t specialization. TODO - maybe add an event ID for debug? +template <> +inline std::string ToString(hipEvent_t v) { + std::ostringstream ss; + ss << "event:" << std::hex << static_cast(v); + return ss.str(); +}; + +// hipIpcEventHandle_t +template <> +inline std::string ToString(hipIpcEventHandle_t v) { + std::ostringstream ss; + ss << "ipc event:" << std::hex << static_cast(&v); + return ss.str(); +}; + +// hipStream_t +template <> +inline std::string ToString(hipStream_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "stream:"; + } else { + ss << "stream:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipCtx_t +template <> +inline std::string ToString(hipCtx_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "context:"; + } else { + ss << "context:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipPitchedPtr +template <> +inline std::string ToString(hipPitchedPtr v) { + std::ostringstream ss; + ss << "pitchPtr:" << std::hex << static_cast(v.ptr); + return ss.str(); +}; + +// hipMemcpyKind specialization +template <> +inline std::string ToString(hipMemcpyKind v) { + switch (v) { + CASE_STR(hipMemcpyHostToHost); + CASE_STR(hipMemcpyHostToDevice); + CASE_STR(hipMemcpyDeviceToHost); + CASE_STR(hipMemcpyDeviceToDevice); + CASE_STR(hipMemcpyDefault); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipFuncCache_t v) { + switch (v) { + CASE_STR(hipFuncCachePreferNone); + CASE_STR(hipFuncCachePreferShared); + CASE_STR(hipFuncCachePreferL1); + CASE_STR(hipFuncCachePreferEqual); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipSharedMemConfig v) { + switch (v) { + CASE_STR(hipSharedMemBankSizeDefault); + CASE_STR(hipSharedMemBankSizeFourByte); + CASE_STR(hipSharedMemBankSizeEightByte); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipError_t v) { + return ihipErrorString(v); +}; + +// Catch empty arguments case +inline std::string ToString() { return (""); } + + +//--- +// C++11 variadic template - peels off first argument, converts to string, and calls itself again to +// peel the next arg. Strings are automatically separated by comma+space. +template +inline std::string ToString(T first, Args... args) { + return ToString(first) + ", " + ToString(args...); +} + From 727c9f77d8d44cc91c8c4a560e06a8f5c2376a2e Mon Sep 17 00:00:00 2001 From: Paul Fultz II Date: Thu, 23 Apr 2020 11:21:02 -0500 Subject: [PATCH 109/132] Add __HIP_PLATFORM_HCC__ to hip::host target (#2029) --- hip-config.cmake.in | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hip-config.cmake.in b/hip-config.cmake.in index baa7c1607f..e663450930 100644 --- a/hip-config.cmake.in +++ b/hip-config.cmake.in @@ -101,6 +101,11 @@ if (HSA_HEADER-NOTFOUND) message (FATAL_ERROR "HSA header not found! ROCM_PATH environment not set") endif() +# Right now this is only supported for amd platforms +set_target_properties(hip::host PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_PLATFORM_HCC__=1" +) + if(HIP_RUNTIME MATCHES "VDI") set_target_properties(hip::amdhip64 PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" From a2768ea55463278eea98864cd813194b6da6d1fc Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Thu, 23 Apr 2020 12:21:33 -0400 Subject: [PATCH 110/132] extractkernel obj dump args to use double dashes (#2034) More recent llvm-objdump has changed disassemble and mcpu arguments to require double dashes. --- bin/extractkernel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/extractkernel b/bin/extractkernel index 871610be06..81760f50de 100755 --- a/bin/extractkernel +++ b/bin/extractkernel @@ -233,7 +233,7 @@ while(1) { my $isa_file_name = "${filename_prefix}-${asic_target}.isa"; # use llvm-objdump to dump out GCN ISA - system("$llvm_objdump -disassemble -mcpu=$asic_target $hsaco_file_name > $isa_file_name") == 0 or die("Fail to disassemble AMDGPU ISA for target" . $asic_target); + system("$llvm_objdump --disassemble --mcpu=$asic_target $hsaco_file_name > $isa_file_name") == 0 or die("Fail to disassemble AMDGPU ISA for target" . $asic_target); if ($debug) { print("Generated GCN ISA for " . $asic_target . " at: " . $isa_file_name . "\n"); From 75a50884466c07387853e6d572790a4f47c9bc80 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 23 Apr 2020 12:21:54 -0400 Subject: [PATCH 111/132] Fix hip-config.cmake for CMAKE_CXX_COMPILER=g++ (#2035) * Fix hip-config.cmake for CMAKE_CXX_COMPILER=g++ Change-Id: I84ae83a5d223853706dd36834b7962ffe9573c1f * Fix HIP_CXX_COMPILER Change-Id: I4e2523b560113420af1ab877a0bc48ca2e9e957a * Fix hip-config.cmake Change-Id: Ida4ef8f40fed83d5f659a9a9835b1f521cb6a374 * Add inlineall option to hip::device in hip-config.cmake Change-Id: Ib51f82d0b9e38d2137df65c940592413e22ba07a --- hip-config.cmake.in | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/hip-config.cmake.in b/hip-config.cmake.in index e663450930..859e2fa0fc 100644 --- a/hip-config.cmake.in +++ b/hip-config.cmake.in @@ -51,8 +51,11 @@ set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") if(HIP_COMPILER STREQUAL "clang") - if(CMAKE_CXX_COMPILER MATCHES ".*hipcc") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + if(NOT HIP_CXX_COMPILER) + set(HIP_CXX_COMPILER ${CMAKE_CXX_COMPILER}) + endif() + if(HIP_CXX_COMPILER MATCHES ".*hipcc") + execute_process(COMMAND ${HIP_CXX_COMPILER} --version OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT) if(HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT MATCHES "InstalledDir:[\t\r\n][\t\r\n]*([^\t\r\n])") @@ -60,9 +63,11 @@ if(HIP_COMPILER STREQUAL "clang") else() set(HIP_CLANG_ROOT /opt/rocm/llvm) endif() - else() - get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) + elseif (HIP_CXX_COMPILER MATCHES ".*clang\\+\\+") + get_filename_component(HIP_CLANG_ROOT "${HIP_CXX_COMPILER}" PATH) get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) + else() + set(HIP_CLANG_ROOT /opt/rocm/llvm) endif() file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) find_path(HIP_CLANG_INCLUDE_PATH stddef.h @@ -137,6 +142,12 @@ if(HIP_COMPILER STREQUAL "clang") INTERFACE_COMPILE_OPTIONS -x hip --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib ) + if (HIP_CXX_COMPILER MATCHES ".*clang\\+\\+") + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false + ) + endif() + set_property(TARGET hip::device APPEND PROPERTY INTERFACE_LINK_LIBRARIES --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib --hip-link ) From d0511dfb1618093ed2c00b8d9b26f768ad0d9bda Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Thu, 23 Apr 2020 11:24:51 -0500 Subject: [PATCH 112/132] fix pointers format (#2037) * fix pointers format * fix pointers format - cleanup * fix pointers format - fix --- hip_prof_gen.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hip_prof_gen.py b/hip_prof_gen.py index 38b7aaeb6e..475d28186b 100755 --- a/hip_prof_gen.py +++ b/hip_prof_gen.py @@ -43,7 +43,7 @@ def filtr_api_args(args_str): args_str = re.sub(r'\s*$', r'', args_str); args_str = re.sub(r'\s*,\s*', r',', args_str); args_str = re.sub(r'\s+', r' ', args_str); - args_str = re.sub(r'void \*', r'void* ', args_str); + args_str = re.sub(r'\s*(\*+)\s*', r'\1 ', args_str); args_str = re.sub(r'(enum|struct) ', '', args_str); return args_str @@ -472,7 +472,8 @@ not_found = 0 if len(opts_map) != 0: for name in api_map.keys(): args_str = api_map[name]; - api_map[name] = list_api_args(args_str) + args_list = list_api_args(args_str) + api_map[name] = args_list if not name in opts_map: error("implementation not found: " + name) not_found += 1 From 8d6347c6b85444af1fb52f60158bbed912dd3ddc Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Thu, 23 Apr 2020 16:38:32 -0400 Subject: [PATCH 113/132] Make sure to zero out all the unset texture fields These might contain garbage causing the runtime to incorrectly parse the state of the texture references. Change-Id: I93c726fa30b580b3e14c50ac939f3c71b0d1c8d9 --- include/hip/hcc_detail/hip_texture_types.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/hip/hcc_detail/hip_texture_types.h b/include/hip/hcc_detail/hip_texture_types.h index e92babfd5a..7c3a0138c1 100644 --- a/include/hip/hcc_detail/hip_texture_types.h +++ b/include/hip/hcc_detail/hip_texture_types.h @@ -65,6 +65,10 @@ struct __HIP_TEXTURE_ATTRIB texture : public textureReference { channelDesc = hipCreateChannelDesc(); sRGB = 0; textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; } texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode, @@ -78,6 +82,10 @@ struct __HIP_TEXTURE_ATTRIB texture : public textureReference { channelDesc = desc; sRGB = 0; textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; } }; From b443172d120a5c6ead5de2498bdd397d518e8416 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Wed, 22 Apr 2020 18:29:31 -0400 Subject: [PATCH 114/132] SWDEV - 231874 - Do Dword aligned memset if the total size aligns. Change-Id: Id05db4cfd9c43b2cffa3cec8b02f1cd07f340dd6 --- vdi/hip_memory.cpp | 88 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 22 deletions(-) diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index 4178cea93e..bf8c14d461 100755 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -1652,6 +1652,32 @@ hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { HIP_RETURN(ihipMemcpyParam3D(pCopy, stream, true)); } +hipError_t packFillMemoryCommand(amd::Memory* memory, size_t offset, int value, size_t valueSize, + size_t sizeBytes, amd::HostQueue* queue, bool isAsync = false) { + + if ((memory == nullptr) || (queue == nullptr)) { + return hipErrorInvalidValue; + } + + amd::Command::EventWaitList waitList; + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, valueSize, fillOffset, fillSize); + if (command == nullptr) { + return hipErrorOutOfMemory; + } + command->enqueue(); + + if (!isAsync) { + command->awaitCompletion(); + } + + command->release(); + return hipSuccess; +} + hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, hipStream_t stream, bool isAsync = false) { if (sizeBytes == 0) { @@ -1664,33 +1690,51 @@ hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, } size_t offset = 0; - amd::HostQueue* queue = hip::getQueue(stream); amd::Memory* memory = getMemoryObject(dst, offset); - - if (memory != nullptr) { - // Device memory - amd::Command::EventWaitList waitList; - amd::Coord3D fillOffset(offset, 0, 0); - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, valueSize, fillOffset, fillSize); - - if (command == nullptr) { - return hipErrorOutOfMemory; - } - - command->enqueue(); - if (!isAsync) { - command->awaitCompletion(); - } - command->release(); - } else { + if (memory == nullptr) { // Host alloced memory memset(dst, value, sizeBytes); + return hipSuccess; } - return hipSuccess; + hipError_t hip_error = hipSuccess; + amd::HostQueue* queue = hip::getQueue(stream); + + int32_t value32 = 0; + const size_t dwordModSize = (sizeBytes % sizeof(int32_t)); + + if (sizeBytes/sizeof(int32_t) > 0) { + if (valueSize == sizeof(int8_t)) { + value = value & 0xff; + value32 = ((value << 24) | (value << 16) | (value << 8) | (value)); + } else if (valueSize == sizeof(int16_t)) { + value = value & 0xffff; + value32 = ((value<<16) | (value)); + } else if(valueSize == sizeof(int32_t)) { + value32 = value; + } else { + LogPrintfError("Unsupported Pattern size: %u \n", valueSize); + return hipErrorInvalidValue; + } + // If dwordModSize is != 0 then we will do a second fillBuffer Command + // on the same stream below, dont wait, do the first call async. + hip_error = packFillMemoryCommand(memory, offset, value32, sizeof(int32_t), + sizeBytes - dwordModSize, queue, + ((dwordModSize != 0) || isAsync)); + if(hip_error != hipSuccess) { + return hip_error; + } + } + + if (dwordModSize != 0) { + void* new_dst = reinterpret_cast((reinterpret_cast
(dst) + + sizeBytes) - dwordModSize); + memory = getMemoryObject(new_dst, offset); + hip_error = packFillMemoryCommand(memory, offset, value, valueSize, + dwordModSize, queue, isAsync); + } + + return hip_error; } hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { From 04794c6359ed9198d51c6fce69928529efcb599f Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Thu, 23 Apr 2020 17:12:52 -0400 Subject: [PATCH 115/132] [vdi] Fix hang due to recursive locking. - Declare recursive lock for global function map. Change-Id: I3e792e21688c980343c4fd1c61dcad97f7a890cd --- vdi/hip_internal.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 vdi/hip_internal.hpp diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp old mode 100755 new mode 100644 index 3e09df03d4..004a4cf64d --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -179,7 +179,7 @@ struct ihipExec_t { }; class PlatformState { - amd::Monitor lock_{"Guards global function map"}; + amd::Monitor lock_{"Guards global function map", true}; std::unordered_map>> modules_; bool initialized_{false}; From 74ba25602bc7422c23e529093046df627c4f6e73 Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Thu, 23 Apr 2020 23:05:28 -0400 Subject: [PATCH 116/132] [vdi] Fix texture reference sample. - The driver code should not re-define `tex` again as it's already defined in the kernel code. Eventually, the driver code should be as regular C++ code instad of HIP code. Change-Id: I8c7cab204b98990619d6e7109b990d7089ea9261 --- samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) mode change 100755 => 100644 samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp diff --git a/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp b/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp old mode 100755 new mode 100644 index b42ac86ad1..01729222ee --- a/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp +++ b/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp @@ -27,7 +27,6 @@ THE SOFTWARE. #define fileName "tex2dKernel.code" -texture tex; bool testResult = true; #define HIP_CHECK(cmd) \ @@ -122,7 +121,7 @@ bool runTest(int argc, char** argv) { } } } - HIP_CHECK(hipUnbindTexture(tex)); + HIP_CHECK(hipUnbindTexture(texref)); HIP_CHECK(hipFree(dData)); HIP_CHECK(hipFreeArray(array)); return testResult; From e130c3ba4fe45c09b7a41b13b5ba7f17485adb19 Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Thu, 23 Apr 2020 14:57:22 -0400 Subject: [PATCH 117/132] Add Performance/memory/hipPerfMemMallocCpyFree Change-Id: Id1ac415f85028da3e9517c2226764bcb4acec1b1 --- .../memory/hipPerfMemMallocCpyFree.cpp | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp diff --git a/tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp b/tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp new file mode 100644 index 0000000000..f059a564a1 --- /dev/null +++ b/tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp @@ -0,0 +1,114 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "test_common.h" +#include +#include + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +#define NUM_SIZE 19 //size up to 16M +#define NUM_ITER 500 //Total GPU memory up to 16M*500=8G + +void valSet(int* A, int val, size_t size) { + size_t len = size / sizeof(int); + for (int i = 0; i < len; i++) { + A[i] = val; + } +} + +void setup(size_t *size, const int num, int **pA) { + std::cout << "size: "; + for (int i = 0; i < num; i++) { + size[i] = 1 << (i + 6); + std::cout << size[i] << " "; + } + std::cout << std::endl; + *pA = (int*)malloc(size[num - 1]); + valSet(*pA, 1, size[num - 1]); +} + +void testInit(size_t size, int *A) { + int *Ad; + clock_t start = clock(); + hipMalloc(&Ad, size); //hip::init() will be called + clock_t end = clock(); + double uS = (end - start) * 1000000. / CLOCKS_PER_SEC; + std::cout << "Initial" << std::endl; + std::cout << "hipMalloc(" << size << ") cost " << uS << "us" << std::endl; + + start = clock(); + hipMemcpy(Ad, A, size, hipMemcpyHostToDevice); + hipDeviceSynchronize(); + end = clock(); + uS = (end - start) * 1000000. / CLOCKS_PER_SEC; + std::cout << "hipMemcpy(" << size << ") cost " << uS << "us" << std::endl; + + start = clock(); + hipFree(Ad); + end = clock(); + uS = (end - start) * 1000000. / CLOCKS_PER_SEC; + std::cout << "hipFree(" << size << ") cost " << uS << "us" << std::endl; +} + +int main() { + double uS; + clock_t start, end; + size_t size[NUM_SIZE] = { 0 }; + int *Ad[NUM_ITER] = { nullptr }; + int *A; + + setup(size, NUM_SIZE, &A); + testInit(size[0], A); + + for (int i = 0; i < NUM_SIZE; i++) { + std::cout << size[i] << std::endl; + start = clock(); + for (int j = 0; j < NUM_ITER; j++) { + HIPCHECK(hipMalloc(&Ad[j], size[i])); + } + end = clock(); + uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC); + std::cout << "hipMalloc(" << size[i] << ") cost " << uS << "us" << std::endl; + + start = clock(); + for (int j = 0; j < NUM_ITER; j++) { + HIPCHECK(hipMemcpy(Ad[j], A, size[i], hipMemcpyHostToDevice)); + } + hipDeviceSynchronize(); + end = clock(); + uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC); + std::cout << "hipMemcpy(" << size[i] << ") cost " << uS << "us" << std::endl; + + start = clock(); + for (int j = 0; j < NUM_ITER; j++) { + HIPCHECK(hipFree(Ad[j])); + Ad[j] = nullptr; + } + end = clock(); + double uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC); + std::cout << "hipFree(" << size[i] << ") cost " << uS << "us" << std::endl; + } + free(A); + passed(); +} From d568f78bdd92370bc7baf52510f1af2362043e83 Mon Sep 17 00:00:00 2001 From: Paul Fultz II Date: Thu, 23 Apr 2020 11:21:02 -0500 Subject: [PATCH 118/132] cherrypick from master branch 75a50884466c07387853e6d572790a4f47c9bc80 727c9f77d8d44cc91c8c4a560e06a8f5c2376a2e These are needed by MIOpen and rocBLAS. Add __HIP_PLATFORM_HCC__ to hip::host target (#2029) Fix hip-config.cmake for CMAKE_CXX_COMPILER=g++ (#2035) * Fix hip-config.cmake for CMAKE_CXX_COMPILER=g++ * Add inlineall option to hip::device in hip-config.cmake Change-Id: Ib51f82d0b9e38d2137df65c940592413e22ba07a --- hip-config.cmake.in | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/hip-config.cmake.in b/hip-config.cmake.in index baa7c1607f..859e2fa0fc 100644 --- a/hip-config.cmake.in +++ b/hip-config.cmake.in @@ -51,8 +51,11 @@ set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") if(HIP_COMPILER STREQUAL "clang") - if(CMAKE_CXX_COMPILER MATCHES ".*hipcc") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + if(NOT HIP_CXX_COMPILER) + set(HIP_CXX_COMPILER ${CMAKE_CXX_COMPILER}) + endif() + if(HIP_CXX_COMPILER MATCHES ".*hipcc") + execute_process(COMMAND ${HIP_CXX_COMPILER} --version OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT) if(HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT MATCHES "InstalledDir:[\t\r\n][\t\r\n]*([^\t\r\n])") @@ -60,9 +63,11 @@ if(HIP_COMPILER STREQUAL "clang") else() set(HIP_CLANG_ROOT /opt/rocm/llvm) endif() - else() - get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) + elseif (HIP_CXX_COMPILER MATCHES ".*clang\\+\\+") + get_filename_component(HIP_CLANG_ROOT "${HIP_CXX_COMPILER}" PATH) get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) + else() + set(HIP_CLANG_ROOT /opt/rocm/llvm) endif() file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) find_path(HIP_CLANG_INCLUDE_PATH stddef.h @@ -101,6 +106,11 @@ if (HSA_HEADER-NOTFOUND) message (FATAL_ERROR "HSA header not found! ROCM_PATH environment not set") endif() +# Right now this is only supported for amd platforms +set_target_properties(hip::host PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_PLATFORM_HCC__=1" +) + if(HIP_RUNTIME MATCHES "VDI") set_target_properties(hip::amdhip64 PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" @@ -132,6 +142,12 @@ if(HIP_COMPILER STREQUAL "clang") INTERFACE_COMPILE_OPTIONS -x hip --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib ) + if (HIP_CXX_COMPILER MATCHES ".*clang\\+\\+") + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false + ) + endif() + set_property(TARGET hip::device APPEND PROPERTY INTERFACE_LINK_LIBRARIES --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib --hip-link ) From 4143d81618d3674d161e144c02aa1962eab55221 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Sun, 19 Apr 2020 12:56:50 -0400 Subject: [PATCH 119/132] Enable template max and min for HIP-Clang This change is required by AMDMIGraphX. It was for HCC only. HIP-Clang also needs it for __fp16 since AMDMIGraphX uses it. Change-Id: Id49322b7b89ef799accdf6b47627a6fce51d1ab5 --- include/hip/hcc_detail/math_functions.h | 13 ++++---- tests/src/deviceLib/hipMathFunctions.cpp | 40 +++++++++++++++++++++--- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h index 19def9ec7e..c021fb7dec 100644 --- a/include/hip/hcc_detail/math_functions.h +++ b/include/hip/hcc_detail/math_functions.h @@ -1397,12 +1397,18 @@ float func(float x, int y) \ } __DEF_FLOAT_FUN2I(scalbn) -#if __HCC__ template __DEVICE__ inline static T min(T arg1, T arg2) { return (arg1 < arg2) ? arg1 : arg2; } +template +__DEVICE__ inline static T max(T arg1, T arg2) { + return (arg1 > arg2) ? arg1 : arg2; +} + +#if __HCC__ + __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) { return min(arg1, (uint32_t) arg2); } @@ -1424,11 +1430,6 @@ __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long lo return min((unsigned long long) arg1, arg2); }*/ -template -__DEVICE__ inline static T max(T arg1, T arg2) { - return (arg1 > arg2) ? arg1 : arg2; -} - __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) { return max(arg1, (uint32_t) arg2); } diff --git a/tests/src/deviceLib/hipMathFunctions.cpp b/tests/src/deviceLib/hipMathFunctions.cpp index b1b0e8334a..f8c58497dc 100644 --- a/tests/src/deviceLib/hipMathFunctions.cpp +++ b/tests/src/deviceLib/hipMathFunctions.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ @@ -146,17 +146,45 @@ void check_abs_int64() { CHECK_ABS_INT64(inputCPU[5], outputCPU[5], outputCPU[5]); CHECK_ABS_INT64(inputCPU[6], outputCPU[6], outputCPU[7]); CHECK_ABS_INT64(inputCPU[7], outputCPU[7], outputCPU[7]); - + // free memories hipFree(inputGPU); hipFree(outputGPU); free(inputCPU); free(outputCPU); - + // done return; } - + + +template +__global__ void kernel_simple(F f, T *out) { + *out = f(); +} + +template +void check_simple(F f, T expected, const char* file, unsigned line) { + auto memsize = sizeof(T); + T *outputCPU = (T *) malloc(memsize); + T *outputGPU = nullptr; + hipMalloc((void**)&outputGPU, memsize); + hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU); + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + if (*outputCPU != expected) { + failed("%s line %u : check failed (output = %lf, expected = %lf)\n", + file, line, (double)(*outputCPU), (double)expected); + } + hipFree(outputGPU); + free(outputCPU); +} +#define CHECK_SIMPLE(lambda, expected) \ + check_simple(lambda, expected, __FILE__, __LINE__); + +void test_fp16() { + CHECK_SIMPLE([]__device__(){ return max<__fp16>(1.0f, 2.0f); }, 2.0f); + CHECK_SIMPLE([]__device__(){ return min<__fp16>(1.0f, 2.0f); }, 1.0f); +} int main(int argc, char* argv[]) { HipTest::parseStandardArguments(argc, argv, true); @@ -164,6 +192,8 @@ int main(int argc, char* argv[]) { check_abs_int64(); // check_lgamma_double(); - + + test_fp16(); + passed(); } From f7f7337baea0a6e4c20e3f0aa236be8ebc7b1c5a Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Thu, 23 Apr 2020 16:54:48 -0400 Subject: [PATCH 120/132] SWDEV-232918 hipEventRecord is much slower in hipclang/vdi - Make sure default streams don't sync each other. - Add null stream into the list of default streams. - Code clean-up to simplify queue look-up. Change-Id: I36e1fc8d86a600e3dce806694d95d146ed8afd03 --- vdi/hip_context.cpp | 37 +++------------ vdi/hip_device.cpp | 18 +++----- vdi/hip_internal.hpp | 50 +++++++++++++-------- vdi/hip_memory.cpp | 15 ++----- vdi/hip_platform.cpp | 4 +- vdi/hip_stream.cpp | 105 +++++++++++++++++++------------------------ 6 files changed, 95 insertions(+), 134 deletions(-) diff --git a/vdi/hip_context.cpp b/vdi/hip_context.cpp index 8869bb07ff..2f75d07b8a 100755 --- a/vdi/hip_context.cpp +++ b/vdi/hip_context.cpp @@ -82,42 +82,17 @@ amd::HostQueue* getQueue(hipStream_t stream) { if (stream == nullptr) { return getNullStream(); } else { - hip::Stream* s = reinterpret_cast(stream); - // Wait for null stream - if ((s->flags & hipStreamNonBlocking) == 0) { - amd::HostQueue* nullStream = getNullStream(); - amd::Command::EventWaitList eventWaitList; - - amd::Command* command = nullStream->getLastQueuedCommand(true); - if ((command != nullptr) && - // Check the current active status - (command->status() != CL_COMPLETE)) { - eventWaitList.push_back(command); - } - - // Check if we have to wait anything - if (eventWaitList.size() > 0) { - amd::Command* command = new amd::Marker(*s->asHostQueue(), false, eventWaitList); - if (command != nullptr) { - command->enqueue(); - command->release(); - } - } - - // Release all active commands. It's safe after the marker was enqueued - for (const auto& it : eventWaitList) { - it->release(); - } - } - - return s->asHostQueue(); + constexpr bool WaitNullStreamOnly = true; + amd::HostQueue* queue = reinterpret_cast(stream)->asHostQueue(); + iHipWaitActiveStreams(queue, WaitNullStreamOnly); + return queue; } } amd::HostQueue* getNullStream(amd::Context& ctx) { for (auto& it : g_devices) { if (it->asContext() == &ctx) { - return it->defaultStream(); + return it->NullStream(); } } return nullptr; @@ -125,7 +100,7 @@ amd::HostQueue* getNullStream(amd::Context& ctx) { amd::HostQueue* getNullStream() { Device* device = getCurrentDevice(); - return device ? device->defaultStream() : nullptr; + return device ? device->NullStream() : nullptr; } }; diff --git a/vdi/hip_device.cpp b/vdi/hip_device.cpp index c01dd5f195..5dfc595ee9 100644 --- a/vdi/hip_device.cpp +++ b/vdi/hip_device.cpp @@ -24,20 +24,14 @@ namespace hip { -amd::HostQueue* Device::defaultStream() { - if (defaultStream_ == nullptr) { - const cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; - defaultStream_ = new amd::HostQueue(*asContext(), *devices()[0], properties, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); - if ((defaultStream_ == nullptr) || - !defaultStream_->create()) { - return nullptr; - } +amd::HostQueue* Device::NullStream() { + amd::HostQueue* null_queue = null_stream_.asHostQueue(); + if (null_queue == nullptr) { + return nullptr; } // Wait for all active streams before executing commands on the default - iHipWaitActiveStreams(defaultStream_); - return defaultStream_; + iHipWaitActiveStreams(null_queue); + return null_queue; } }; diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 004a4cf64d..18521c4890 100644 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -78,6 +78,31 @@ class accelerator_view; }; namespace hip { + class Device; + + class Stream { + amd::HostQueue* queue_; + mutable amd::Monitor lock_; + Device* device_; + amd::CommandQueue::Priority priority_; + unsigned int flags_; + bool null_; + + public: + Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f = 0, bool null_stream = false); + bool create(); + amd::HostQueue* asHostQueue(); + void destroy(); + void finish() const; + /// Get device ID associated with the current stream; + int DeviceId() const; + /// Returns if stream is null stream + bool Null() const { return null_; } + /// Returns the lock object for the current stream + amd::Monitor& Lock() const { return lock_; } + /// Returns the creation flags for the current stream + unsigned int Flags() const { return flags_; } + }; /// HIP Device class class Device { @@ -85,14 +110,17 @@ namespace hip { /// VDI context amd::Context* context_; /// VDI host queue for default streams - amd::HostQueue* defaultStream_ = nullptr; + Stream null_stream_; /// Device's ID /// Store it here so we don't have to loop through the device list every time int deviceId_; //Maintain list of user enabled peers std::list userEnabledPeers; + public: - Device(amd::Context* ctx, int devId): context_(ctx), deviceId_(devId) { assert(ctx != nullptr); } + Device(amd::Context* ctx, int devId): + context_(ctx), deviceId_(devId), null_stream_(this, amd::CommandQueue::Priority::Normal, 0, true) + { assert(ctx != nullptr); } ~Device() {} amd::Context* asContext() const { return context_; } @@ -119,7 +147,7 @@ namespace hip { return hipErrorPeerAccessNotEnabled; } } - amd::HostQueue* defaultStream(); + amd::HostQueue* NullStream(); }; extern std::once_flag g_ihipInitialized; @@ -154,20 +182,6 @@ namespace hip { static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } }; - struct Stream { - amd::HostQueue* queue; - amd::Monitor lock; - Device* device; - amd::CommandQueue::Priority priority; - unsigned int flags; - - Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f); - void create(); - amd::HostQueue* asHostQueue(); - void destroy(); - void finish(); - }; - }; struct ihipExec_t { @@ -300,7 +314,7 @@ public: /// Wait all active streams on the blocking queue. The method enqueues a wait command and /// doesn't stall the current thread -extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue); +extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream = false); extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index bf8c14d461..2b3daab894 100755 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -48,10 +48,7 @@ hipError_t ihipFree(void *ptr) } if (amd::SvmBuffer::malloced(ptr)) { for (auto& dev : g_devices) { - amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); - if (queue != nullptr) { - queue->finish(); - } + dev->NullStream()->finish(); } amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr); return hipSuccess; @@ -283,10 +280,7 @@ hipError_t ihipArrayDestroy(hipArray* array) { return hipErrorInvalidValue; } for (auto& dev : g_devices) { - amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); - if (queue != nullptr) { - queue->finish(); - } + dev->NullStream()->finish(); } as_amd(memObj)->release(); @@ -684,10 +678,7 @@ hipError_t hipHostUnregister(void* hostPtr) { HIP_INIT_API(hipHostUnregister, hostPtr); for (auto& dev : g_devices) { - amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); - if (queue != nullptr) { - queue->finish(); - } + dev->NullStream()->finish(); } if (amd::SvmBuffer::malloced(hostPtr)) { diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index d00974c6bc..c5a9099bf5 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -733,7 +733,7 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) PlatformState::instance().popExec(exec); hip::Stream* stream = reinterpret_cast(exec.hStream_); - int deviceId = (stream != nullptr)? stream->device->deviceId() : ihipGetDevice(); + int deviceId = (stream != nullptr)? stream->DeviceId() : ihipGetDevice(); if (deviceId == -1) { DevLogPrintfError("Wrong DeviceId: %d \n", deviceId); HIP_RETURN(hipErrorNoDevice); @@ -1212,7 +1212,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, stream); hip::Stream* s = reinterpret_cast(stream); - int deviceId = (s != nullptr)? s->device->deviceId() : ihipGetDevice(); + int deviceId = (s != nullptr)? s->DeviceId() : ihipGetDevice(); if (deviceId == -1) { DevLogPrintfError("Wrong Device Id: %d \n", deviceId); HIP_RETURN(hipErrorNoDevice); diff --git a/vdi/hip_stream.cpp b/vdi/hip_stream.cpp index aefddef17f..b2838c0164 100644 --- a/vdi/hip_stream.cpp +++ b/vdi/hip_stream.cpp @@ -42,65 +42,68 @@ class StreamCallback { namespace hip { -void syncStreams() { - amd::ScopedLock lock(streamSetLock); +Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, + unsigned int f, bool null_stream) + : queue_(nullptr), lock_("Stream Callback lock"), device_(dev), + priority_(p), flags_(f), null_(null_stream) {} - for (const auto& it : streamSet) { - if (it->device->deviceId() == getCurrentDevice()->deviceId()) { - it->finish(); - } - } -} - -Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) : - queue(nullptr), lock("Stream Callback lock"), device(dev), priority(p), flags(f) {} - -void Stream::create() { +bool Stream::create() { cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; - queue = new amd::HostQueue(*device->asContext(), *device->devices()[0], properties, - amd::CommandQueue::RealTimeDisabled, priority); - assert(queue != nullptr); - queue->create(); + queue_ = new amd::HostQueue(*device_->asContext(), *device_->devices()[0], properties, + amd::CommandQueue::RealTimeDisabled, priority_); + assert(queue_ != nullptr); + return queue_->create(); } amd::HostQueue* Stream::asHostQueue() { - if (queue == nullptr) { - create(); + if (queue_ == nullptr) { + if (!create()) { + return nullptr; + } else if (Null()) { + // Make sure the null stream is inserted into the list of default/blocking streams + amd::ScopedLock lock(streamSetLock); + streamSet.insert(this); + } } - return queue; + return queue_; } void Stream::destroy() { - if (queue != nullptr) { - queue->release(); - queue = nullptr; + if (queue_ != nullptr) { + queue_->release(); + queue_ = nullptr; } } -void Stream::finish() { - if (queue != nullptr) { - queue->finish(); +void Stream::finish() const { + if (queue_ != nullptr) { + queue_->finish(); } } +int Stream::DeviceId() const { + return device_->deviceId(); +} + }; -void iHipWaitActiveStreams(amd::HostQueue* blocking_queue) { +void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream) { amd::Command::EventWaitList eventWaitList; { amd::ScopedLock lock(streamSetLock); - for (const auto& it : streamSet) { + for (const auto& stream : streamSet) { + amd::HostQueue* active_queue = stream->asHostQueue(); // If it's the current device - if ((it->queue != nullptr) && (&it->queue->device() == &blocking_queue->device()) && - // and it's a blocking streamclan - ((it->flags & hipStreamNonBlocking) == 0) && + if ((active_queue != nullptr) && (&active_queue->device() == &blocking_queue->device()) && // and it's not the current stream - (it->asHostQueue() != blocking_queue)) { + (active_queue != blocking_queue) && + // check for a wait on the null stream + (stream->Null() == wait_null_stream)) { // Get the last valid so command - amd::Command* command = it->asHostQueue()->getLastQueuedCommand(true); + amd::Command* command = active_queue->getLastQueuedCommand(true); if ((command != nullptr) && - // Check the current active status + // Check the current active status (command->status() != CL_COMPLETE)) { eventWaitList.push_back(command); } @@ -127,7 +130,7 @@ void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, hipError_t status = hipSuccess; StreamCallback* cbo = reinterpret_cast(user_data); { - amd::ScopedLock lock(reinterpret_cast(cbo->stream_)->lock); + amd::ScopedLock lock(reinterpret_cast(cbo->stream_)->Lock()); cbo->callBack_(cbo->stream_, status, cbo->userData_); } cbo->command_->release(); @@ -142,12 +145,8 @@ static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd: } if (!(flags & hipStreamNonBlocking)) { - hip::syncStreams(); - - { - amd::ScopedLock lock(streamSetLock); - streamSet.insert(hStream); - } + amd::ScopedLock lock(streamSetLock); + streamSet.insert(hStream); } *stream = reinterpret_cast(hStream); @@ -194,13 +193,13 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio return HIP_RETURN(hipSuccess); } -hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) { HIP_INIT_API(hipStreamGetFlags, stream, flags); hip::Stream* hStream = reinterpret_cast(stream); - if(flags != nullptr && hStream != nullptr) { - *flags = hStream->flags; + if (flags != nullptr && hStream != nullptr) { + *flags = hStream->Flags(); } else { HIP_RETURN(hipErrorInvalidValue); } @@ -239,13 +238,7 @@ hipError_t hipStreamDestroy(hipStream_t stream) { hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_API(hipStreamWaitEvent, stream, event, flags); - amd::HostQueue* queue; - - if (stream == nullptr) { - queue = hip::getNullStream(); - } else { - queue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* queue = hip::getQueue(stream); if (event == nullptr) { HIP_RETURN(hipErrorInvalidHandle); @@ -259,12 +252,7 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int hipError_t hipStreamQuery(hipStream_t stream) { HIP_INIT_API(hipStreamQuery, stream); - amd::HostQueue* hostQueue; - if (stream == nullptr) { - hostQueue = hip::getNullStream(); - } else { - hostQueue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* hostQueue = hip::getQueue(stream); amd::Command* command = hostQueue->getLastQueuedCommand(true); if (command == nullptr) { @@ -284,8 +272,7 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback unsigned int flags) { HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); - amd::HostQueue* hostQueue = reinterpret_cast - (stream)->asHostQueue(); + amd::HostQueue* hostQueue = reinterpret_cast(stream)->asHostQueue(); amd::Command* command = hostQueue->getLastQueuedCommand(true); if (command == nullptr) { amd::Command::EventWaitList eventWaitList; From 808dae6813a8fe161bf3cb9e0032e0cfd63b0b77 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Fri, 24 Apr 2020 15:30:28 -0400 Subject: [PATCH 121/132] Enable template max and min for HIP-Clang (#2028) It was for HCC only. HIP-Clang also needs it for __fp16 since AMDMIGraphX uses it. Change-Id: Id49322b7b89ef799accdf6b47627a6fce51d1ab5 --- include/hip/hcc_detail/math_functions.h | 13 +++++----- tests/src/deviceLib/hipMathFunctions.cpp | 32 +++++++++++++++++++++++- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h index e146b1ae4f..11985c3242 100644 --- a/include/hip/hcc_detail/math_functions.h +++ b/include/hip/hcc_detail/math_functions.h @@ -1410,12 +1410,18 @@ float func(float x, int y) \ } __DEF_FLOAT_FUN2I(scalbn) -#if __HCC__ template __DEVICE__ inline static T min(T arg1, T arg2) { return (arg1 < arg2) ? arg1 : arg2; } +template +__DEVICE__ inline static T max(T arg1, T arg2) { + return (arg1 > arg2) ? arg1 : arg2; +} + +#if __HCC__ + __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) { return min(arg1, (uint32_t) arg2); } @@ -1437,11 +1443,6 @@ __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long lo return min((unsigned long long) arg1, arg2); }*/ -template -__DEVICE__ inline static T max(T arg1, T arg2) { - return (arg1 > arg2) ? arg1 : arg2; -} - __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) { return max(arg1, (uint32_t) arg2); } diff --git a/tests/src/deviceLib/hipMathFunctions.cpp b/tests/src/deviceLib/hipMathFunctions.cpp index b1b0e8334a..4d313167e8 100644 --- a/tests/src/deviceLib/hipMathFunctions.cpp +++ b/tests/src/deviceLib/hipMathFunctions.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ @@ -158,6 +158,34 @@ void check_abs_int64() { } +template +__global__ void kernel_simple(F f, T *out) { + *out = f(); +} + +template +void check_simple(F f, T expected, const char* file, unsigned line) { + auto memsize = sizeof(T); + T *outputCPU = (T *) malloc(memsize); + T *outputGPU = nullptr; + hipMalloc((void**)&outputGPU, memsize); + hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU); + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + if (*outputCPU != expected) { + failed("%s line %u : check failed (output = %lf, expected = %lf)\n", + file, line, (double)(*outputCPU), (double)expected); + } + hipFree(outputGPU); + free(outputCPU); +} +#define CHECK_SIMPLE(lambda, expected) \ + check_simple(lambda, expected, __FILE__, __LINE__); + +void test_fp16() { + CHECK_SIMPLE([]__device__(){ return max<__fp16>(1.0f, 2.0f); }, 2.0f); + CHECK_SIMPLE([]__device__(){ return min<__fp16>(1.0f, 2.0f); }, 1.0f); +} + int main(int argc, char* argv[]) { HipTest::parseStandardArguments(argc, argv, true); @@ -165,5 +193,7 @@ int main(int argc, char* argv[]) { // check_lgamma_double(); + test_fp16(); + passed(); } From a0acf7bdaabdf75fe73b72c6e13404f8dc39cd9f Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Fri, 24 Apr 2020 22:18:23 -0400 Subject: [PATCH 122/132] [vdi] Revise the symbol management. - As different modules may have symbols with the same name, each symbol needs identifying with a pair of the module handle and the symbol name. Change-Id: I85650a787d9a424545154cc40ebd59e706fa358f --- vdi/hip_internal.hpp | 11 +++++++---- vdi/hip_memory.cpp | 20 ++++++++++++-------- vdi/hip_platform.cpp | 33 +++++++++++++++++++-------------- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 18521c4890..529ca7abdb 100644 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -265,8 +265,11 @@ private: std::unordered_map functions_; std::unordered_multimap vars_; - // Map from the host shadow symbol to its device name. - std::unordered_map symbols_; + // Map from the host shadow symbol to its device name. As different modules + // may have the same name, each symbol is uniquely identified by a pair of + // module handle and its name. + std::unordered_map> symbols_; static PlatformState* platform_; @@ -286,9 +289,9 @@ public: std::vector< std::pair >* unregisterVar(hipModule_t hmod); - bool findSymbol(const void *hostVar, std::string &devName); + bool findSymbol(const void *hostVar, hipModule_t &hmod, std::string &devName); PlatformState::DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod); - void registerVarSym(const void *hostVar, const char *symbolName); + void registerVarSym(const void *hostVar, hipModule_t hmod, const char *symbolName); void registerVar(const char* symbolName, const DeviceVar& var); void registerFunction(const void* hostFunction, const DeviceFunction& func); diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index 2b3daab894..967952f03f 100755 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -716,13 +716,14 @@ hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t count, size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { DevLogPrintfError("cannot find symbol 0x%x \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &device_ptr, &sym_size)) { DevLogPrintfError("Cannot get global var: %s at device: %d \n", symbolName.c_str(), ihipGetDevice()); HIP_RETURN(hipErrorInvalidSymbol); @@ -748,13 +749,14 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t count, size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &device_ptr, &sym_size)) { DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); @@ -780,13 +782,14 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t co size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &device_ptr, &sym_size)) { DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); @@ -812,13 +815,14 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t count, size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &device_ptr, &sym_size)) { DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index c5a9099bf5..217d152688 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -268,19 +268,21 @@ PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int device return dvar; } -bool PlatformState::findSymbol(const void *hostVar, std::string &symbolName) { +bool PlatformState::findSymbol(const void *hostVar, + hipModule_t &hmod, std::string &symbolName) { auto it = symbols_.find(hostVar); if (it != symbols_.end()) { - symbolName = it->second; + hmod = it->second.first; + symbolName = it->second.second; return true; } DevLogPrintfError("Could not find the Symbol: %s \n", symbolName.c_str()); return false; } -void PlatformState::registerVarSym(const void *hostVar, const char *symbolName) { +void PlatformState::registerVarSym(const void* hostVar, hipModule_t hmod, const char* symbolName) { amd::ScopedLock lock(lock_); - symbols_.insert(std::make_pair(hostVar, std::string(symbolName))); + symbols_.insert(std::make_pair(hostVar, std::make_pair(hmod, std::string(symbolName)))); } void PlatformState::registerVar(const char* hostvar, @@ -494,7 +496,7 @@ bool PlatformState::getTexRef(const char* hostVar, hipModule_t hmod, textureRefe dvar->shadowAllocated = true; } *texRef = reinterpret_cast(dvar->shadowVptr); - registerVarSym(dvar->shadowVptr, hostVar); + registerVarSym(dvar->shadowVptr, hmod, hostVar); return true; } @@ -541,12 +543,13 @@ bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t bool PlatformState::getGlobalVarFromSymbol(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(hostVar, symbolName)) { + if (!PlatformState::instance().findSymbol(hostVar, hmod, symbolName)) { return false; } return PlatformState::instance().getGlobalVar(symbolName.c_str(), - ihipGetDevice(), nullptr, + ihipGetDevice(), hmod, dev_ptr, size_ptr); } @@ -615,7 +618,7 @@ extern "C" void __hipRegisterVar( /*norm*/ 0}; PlatformState::instance().registerVar(hostVar, dvar); - PlatformState::instance().registerVarSym(var, deviceVar); + PlatformState::instance().registerVarSym(var, nullptr, deviceVar); } extern "C" void __hipRegisterSurface(std::vector>* @@ -634,7 +637,7 @@ extern "C" void __hipRegisterSurface(std::vector>* type, /*norm*/ 0}; PlatformState::instance().registerVar(hostVar, dvar); - PlatformState::instance().registerVarSym(var, deviceVar); + PlatformState::instance().registerVarSym(var, nullptr, deviceVar); } extern "C" void __hipRegisterTexture(std::vector>* @@ -653,7 +656,7 @@ extern "C" void __hipRegisterTexture(std::vector>* type, norm}; PlatformState::instance().registerVar(hostVar, dvar); - PlatformState::instance().registerVarSym(var, deviceVar); + PlatformState::instance().registerVarSym(var, nullptr, deviceVar); } extern "C" void __hipUnregisterFatBinary(std::vector< std::pair >* modules) @@ -760,13 +763,14 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { HIP_INIT_API(hipGetSymbolAddress, devPtr, symbol); + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } size_t size = 0; - if(!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if(!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, devPtr, &size)) { DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n", symbolName.c_str(), ihipGetDevice()); @@ -778,13 +782,14 @@ hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) { HIP_INIT_API(hipGetSymbolSize, sizePtr, symbol); + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } hipDeviceptr_t devPtr = nullptr; - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &devPtr, sizePtr)) { DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n", symbolName.c_str(), ihipGetDevice()); From 1f3789d5a232cb1580a19f6d1c734ff5d2858f03 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Fri, 24 Apr 2020 18:33:26 -0400 Subject: [PATCH 123/132] SWDEV-232008 - Handle cases where Memcpy size is less than Symbol Size. Change-Id: I046bc70445dfd4f6fdfd06415f53d1fc06f425b4 --- vdi/hip_memory.cpp | 48 +++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index 967952f03f..bb28106db2 100755 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -709,9 +709,9 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { }; -hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t count, +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind kind) { - HIP_INIT_API(hipMemcpyToSymbol, symbol, src, count, offset, kind); + HIP_INIT_API(hipMemcpyToSymbol, symbol, src, sizeBytes, offset, kind); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; @@ -730,21 +730,21 @@ hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t count, } /* Size Check to make sure offset is correct */ - if ((offset + count) != sym_size) { - DevLogPrintfError("Size does not match, offset: %u count: %u sym_size: %u \n", - offset, count, sym_size); + if ((offset + sizeBytes) > sym_size) { + DevLogPrintfError("Trying to access out of bounds, offset: %u sizeBytes: %u sym_size: %u \n", + offset, sizeBytes, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; /* Copy memory from source to destination address */ - HIP_RETURN(hipMemcpy(device_ptr, src, count, kind)); + HIP_RETURN(hipMemcpy(device_ptr, src, sizeBytes, kind)); } -hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t count, +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes, size_t offset, hipMemcpyKind kind) { - HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, count, offset, kind); + HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, sizeBytes, offset, kind); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; @@ -763,21 +763,21 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t count, } /* Size Check to make sure offset is correct */ - if ((offset + count) != sym_size) { - DevLogPrintfError("Size does not match, offset: %u count: %u sym_size: %u \n", - offset, count, sym_size); + if ((offset + sizeBytes) > sym_size) { + DevLogPrintfError("Trying to access out of bounds, offset: %u sizeBytes: %u sym_size: %u \n", + offset, sizeBytes, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; /* Copy memory from source to destination address */ - HIP_RETURN(hipMemcpy(dst, device_ptr, count, kind)); + HIP_RETURN(hipMemcpy(dst, device_ptr, sizeBytes, kind)); } -hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t count, +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_API(hipMemcpyToSymbolAsync, symbol, src, count, offset, kind, stream); + HIP_INIT_API(hipMemcpyToSymbolAsync, symbol, src, sizeBytes, offset, kind, stream); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; @@ -796,21 +796,21 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t co } /* Size Check to make sure offset is correct */ - if ((offset + count) != sym_size) { - DevLogPrintfError("Size does not match, offset: %u count: %u sym_size: %u \n", - offset, count, sym_size); + if ((offset + sizeBytes) > sym_size) { + DevLogPrintfError("Trying to access out of bounds, offset: %u sizeBytes: %u sym_size: %u \n", + offset, sizeBytes, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; /* Copy memory from source to destination address */ - HIP_RETURN(hipMemcpyAsync(device_ptr, src, count, kind, stream)); + HIP_RETURN(hipMemcpyAsync(device_ptr, src, sizeBytes, kind, stream)); } -hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t count, +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_API(hipMemcpyFromSymbolAsync, symbol, dst, count, offset, kind, stream); + HIP_INIT_API(hipMemcpyFromSymbolAsync, symbol, dst, sizeBytes, offset, kind, stream); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; @@ -829,16 +829,16 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t count, } /* Size Check to make sure offset is correct */ - if ((offset + count) != sym_size) { - DevLogPrintfError("Size does not match, offset: %u count: %u sym_size: %u \n", - offset, count, sym_size); + if ((offset + sizeBytes) > sym_size) { + DevLogPrintfError("Trying to access out of bounds, offset: %u sizeBytes: %u sym_size: %u \n", + offset, sizeBytes, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; /* Copy memory from source to destination address */ - HIP_RETURN(hipMemcpyAsync(dst, device_ptr, count, kind, stream)); + HIP_RETURN(hipMemcpyAsync(dst, device_ptr, sizeBytes, kind, stream)); } hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, From 9c5a7226b2a6c84ef15e7111a044d4945f5c0dff Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 31 Mar 2020 20:30:11 +0000 Subject: [PATCH 124/132] Let hipcc treats .cpp files as C++ files for HIP-Clang This change is required by rocBLAS and rocFFT for OpenMP issue since cmake needs to treat .cpp file as C++ program to detect OpenMP support. This is to match nvcc behavior, speed up compilation of C++ programs, and fix some compilation issue where C++ programs are compiled as HIP programs. Currently it is controlled by an environment variable HIP_COMPILE_CXX_AS_HIP. By default it is 1, where hipcc treats .cpp files as HIP programs. If it is set to 0, hipcc will treat .cpp files as C++ programs. This is because some math libraries are still not ready for the change, however rocBLAS and rocFFT require this feature for OpenMP, therefore put it under an environment variable so that rocBLAS and rocFFT can use it. Change-Id: I56a51e27079df850ee39d4217fb647c22d79f612 --- bin/hipcc | 68 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 2db9fab0b7..8f36faa9aa 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -85,6 +85,7 @@ $HIP_LIB_PATH=$ENV{'HIP_LIB_PATH'}; $HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'}; $DEVICE_LIB_PATH=$ENV{'DEVICE_LIB_PATH'}; $HIP_CLANG_HCC_COMPAT_MODE=$ENV{'HIP_CLANG_HCC_COMPAT_MODE'}; # HCC compatibility mode +$HIP_COMPILE_CXX_AS_HIP=$ENV{'HIP_COMPILE_CXX_AS_HIP'} // "1"; if (defined $HIP_VDI_HOME) { $HIP_INFO_PATH= "$HIP_VDI_HOME/lib/.hipInfo"; @@ -222,9 +223,9 @@ if ($HIP_PLATFORM eq "clang") { $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; $HIPCFLAGS .= " -isystem $HSA_PATH/include"; - if (!($HIP_RUNTIME eq "HCC")) { - $HIPCXXFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api"; - $HIPCFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api"; + if ($HIP_RUNTIME ne "HCC" ) { + $HIPCXXFLAGS .= " -D__HIP_VDI__"; + $HIPCFLAGS .= " -D__HIP_VDI__"; } } elsif ($HIP_PLATFORM eq "hcc") { @@ -344,6 +345,7 @@ my $needLDFLAGS = 1; # need to add LDFLAGS to compile step. my $hasC = 0; # options contain a c-style file my $hasCXX = 0; # options contain a cpp-style file (NVCC must force recognition as GPU file) my $hasCU = 0; # options contain a cu-style file (HCC must force recognition as GPU file) +my $hasHIP = 0; # options contain a hip-style file (HIP-Clang must pass offloading options) my $needHipHcc = ($HIP_PLATFORM eq 'hcc'); # set if we need to link hip_hcc.o from src tree. (some builds, ie cmake, provide their own) my $printHipVersion = 0; # print HIP version my $runCmd = 1; @@ -355,6 +357,7 @@ if(defined $HIP_COMPILER and $HIP_COMPILER eq "hcc") { $coFormatv3 = 0; } my $funcSupp = 0; # enable function support +my $rdc = 0; # whether -fgpu-rdc is on my @options = (); my @inputs = (); @@ -404,6 +407,7 @@ my $optArg = ""; # -O args my $targetOpt = '--amdgpu-target='; my $targetsStr = ""; my $skipOutputFile = 0; # file followed by -o should not contibute in picking compiler flags +my $prevArg = ""; # previous argument foreach $arg (@ARGV) { @@ -417,6 +421,7 @@ foreach $arg (@ARGV) if ($skipOutputFile) { $toolArgs .= " $arg"; + $prevArg = $arg; $skipOutputFile = 0; next; } @@ -603,8 +608,15 @@ foreach $arg (@ARGV) $toolArgs = substr $toolArgs, 0, -8; chomp $toolArgs; } + } elsif ($arg eq 'hip' and $prevArg eq '-x') { + $hasHIP = 1; } elsif ($arg =~ m/^-/) { # options start with - + if ($arg eq '-fgpu-rdc') { + $rdc = 1; + } elsif ($arg eq '-fno-gpu-rdc') { + $rdc = 0; + } # Process HIPCC options here: if ($arg =~ m/^--hipcc/) { @@ -621,7 +633,7 @@ foreach $arg (@ARGV) push (@options, $arg); } #print "O: <$arg>\n"; - } else { + } elsif ($prevArg ne '-o') { # input files and libraries if ($arg =~ /\.c$/) { $hasC = 1; @@ -629,24 +641,28 @@ foreach $arg (@ARGV) $toolArgs .= " -x c" } elsif (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.cc$/) ) { - $hasCXX = 1; $needCXXFLAGS = 1; - if ($HIP_PLATFORM eq 'clang' and not $arg =~ /\.c$/) { - $toolArgs .= " -x hip" + if ($HIP_COMPILE_CXX_AS_HIP eq '0' or $HIP_COMPILER ne "clang") { + $hasCXX = 1; + } else { + $hasHIP = 1; + $toolArgs .= " -x hip"; } } - elsif (($arg =~ /\.cu$/) or ($arg =~ /\.cuh$/) or ($arg =~ /\.hip$/)) { - $hasCU = 1; + elsif ((($arg =~ /\.cu$/ or $arg =~ /\.cuh$/) and $HIP_COMPILE_CXX_AS_HIP ne '0') or ($arg =~ /\.hip$/)) { $needCXXFLAGS = 1; - if ($HIP_PLATFORM eq 'clang') { - $toolArgs .= " -x hip" + if ($HIP_COMPILER eq "clang") { + $hasHIP = 1; + $toolArgs .= " -x hip"; + } else { + $hasCU = 1; } } - push (@inputs, $arg); #print "I: <$arg>\n"; } $toolArgs .= " $arg" unless $swallowArg; + $prevArg = $arg; } if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ @@ -682,7 +698,7 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ $GPU_ARCH_ARG = $GPU_ARCH_OPT . $val; $HIPLDARCHFLAGS .= $GPU_ARCH_ARG; $HIPCXXFLAGS .= $archMacro; - if ($HIP_PLATFORM eq 'clang') { + if ($HIP_PLATFORM eq 'clang' and $hasHIP) { $HIPCXXFLAGS .= $GPU_ARCH_ARG; } @@ -724,12 +740,9 @@ if ($buildDeps and $HIP_PLATFORM eq 'clang') { $HIPCXXFLAGS .= " --cuda-host-only"; } -# Add --hip-link only if there are no source files. -if (!$needCXXFLAGS and !$needCFLAGS and $HIP_PLATFORM eq 'clang') { +# Add --hip-link only if it is compile only and -fgpu-rdc is on. +if ($rdc and !$compileOnly and $HIP_PLATFORM eq 'clang') { $HIPLDFLAGS .= " --hip-link"; -} - -if (!$needCFLAGS and $HIP_PLATFORM eq 'clang') { $HIPLDFLAGS .= $HIPLDARCHFLAGS; } @@ -760,14 +773,19 @@ if ($HIP_PLATFORM eq "clang") { } # Do not pass -mllvm on Windows since there is a clang bug causing duplicate -mllvm options in clang -cc1 on Windows. # ToDo : remove restriction for Windows after clang bug is fixed. - if (!$funcSupp and $optArg ne "-O0" and not $isWindows) { + if (!$funcSupp and $optArg ne "-O0" and not $isWindows and $hasHIP) { $HIPCXXFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; if ($needLDFLAGS and not $needCXXFLAGS) { $HIPLDFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; } } $HIP_DEVLIB_FLAGS = " --hip-device-lib-path=$DEVICE_LIB_PATH"; - $HIPCXXFLAGS .= " $HIP_DEVLIB_FLAGS"; + if ($hasHIP) { + $HIPCXXFLAGS .= " $HIP_DEVLIB_FLAGS"; + if ($HIP_RUNTIME ne "HCC") { + $HIPCXXFLAGS .= " -fhip-new-launch-api"; + } + } if (not $isWindows) { $HIPLDFLAGS .= " -lgcc_s -lgcc -lpthread -lm"; } @@ -781,7 +799,6 @@ if ($HIP_PLATFORM eq "clang") { } } - if ($HIPCC_COMPILE_FLAGS_APPEND) { $HIPCXXFLAGS .= " $HIPCC_COMPILE_FLAGS_APPEND"; $HIPCFLAGS .= " $HIPCC_COMPILE_FLAGS_APPEND"; @@ -791,15 +808,18 @@ if ($HIPCC_LINK_FLAGS_APPEND) { } my $CMD="$HIPCC"; -if ($needLDFLAGS and not $compileOnly) { - $CMD .= " $HIPLDFLAGS"; -} + if ($needCFLAGS) { $CMD .= " $HIPCFLAGS"; } + if ($needCXXFLAGS) { $CMD .= " $HIPCXXFLAGS"; } + +if ($needLDFLAGS and not $compileOnly) { + $CMD .= " $HIPLDFLAGS"; +} $CMD .= " $toolArgs"; if ($verbose & 0x1) { From afc9b546fb8f6f36ab4004f02253ee6690559a7f Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Fri, 24 Apr 2020 15:59:33 +0000 Subject: [PATCH 125/132] Update hipconfig to support HIP-Clang Add support for hipconfig to display details on HIP-Clang compiler and update HIP_COMPILER and HIP_RUNTIME to include clang and rocclr. Also, add hipcc flags --cxxflags and --ldflags to support HCC users who used hccconfig flags. Change-Id: Ib12d81a4ff59d34fb000626836b1adb10be3ac61 --- bin/hipcc | 16 +++++++++++ bin/hipconfig | 76 +++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 77 insertions(+), 15 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index 8f36faa9aa..76d87c0c0b 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -348,6 +348,8 @@ my $hasCU = 0; # options contain a cu-style file (HCC must force recogni my $hasHIP = 0; # options contain a hip-style file (HIP-Clang must pass offloading options) my $needHipHcc = ($HIP_PLATFORM eq 'hcc'); # set if we need to link hip_hcc.o from src tree. (some builds, ie cmake, provide their own) my $printHipVersion = 0; # print HIP version +my $printCXXFlags = 0; # print HIPCXXFLAGS +my $printLDFlags = 0; # print HIPLDFLAGS my $runCmd = 1; my $buildDeps = 0; my $linkType = 1; @@ -473,6 +475,14 @@ foreach $arg (@ARGV) $printHipVersion = 1; $runCmd = 0; } + if($trimarg eq '--cxxflags') { + $printCXXFlags = 1; + $runCmd = 0; + } + if($trimarg eq '--ldflags') { + $printLDFlags = 1; + $runCmd = 0; + } if($trimarg eq '-M') { $compileOnly = 1; $buildDeps = 1; @@ -832,6 +842,12 @@ if ($printHipVersion) { } print $HIP_VERSION, "\n"; } +if ($printCXXFlags) { + print $HIPCXXFLAGS; +} +if ($printLDFlags) { + print $HIPLDFLAGS; +} if ($runCmd) { if ($HIP_PLATFORM eq "hcc" and exists($hipConfig{'HCC_VERSION'}) and $HCC_VERSION ne $hipConfig{'HCC_VERSION'}) { print ("HIP ($HIP_PATH) was built using hcc $hipConfig{'HCC_VERSION'}, but you are using $HCC_HOME/hcc with version $HCC_VERSION from hipcc. Please rebuild HIP including cmake or update HCC_HOME variable.\n") ; diff --git a/bin/hipconfig b/bin/hipconfig index 033908d2d3..2dcc81fa76 100755 --- a/bin/hipconfig +++ b/bin/hipconfig @@ -19,6 +19,7 @@ GetOptions( ,"path|p" => \$p_path ,"compiler|c" => \$p_compiler ,"platform|P" => \$p_platform + ,"runtime|r" => \$p_runtime ,"cpp_config|cxx_config|C" => \$p_cpp_config ,"full|f|info" => \$p_full, ,"version|v" => \$p_version, @@ -30,8 +31,9 @@ if ($p_help) { print "usage: hipconfig [OPTIONS]\n"; print " --path, -p : print HIP_PATH (use env var if set, else determine from hipconfig path)\n"; print " --cpp_config, -C : print C++ compiler options\n"; - print " --compiler, -c : print compiler (hcc or nvcc)\n"; + print " --compiler, -c : print compiler (hcc or clang or nvcc)\n"; print " --platform, -P : print platform (hcc or nvcc)\n"; + print " --runtime, -r : print runtime (HCC or VDI)\n"; print " --full, -f : print full config\n"; print " --version, -v : print hip version\n"; print " --check : check configuration\n"; @@ -82,13 +84,22 @@ if (-e "$HIP_PATH/../.info/version") { $CUDA_PATH=$ENV{'CUDA_PATH'} // '/usr/local/cuda'; $HCC_HOME=$ENV{'HCC_HOME'} // "$ROCM_PATH/hcc"; $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; +$HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'} // "$ROCM_PATH/llvm/bin"; #--- #HIP_PLATFORM controls whether to use NVCC or HCC for compilation: $HIP_PLATFORM=$ENV{'HIP_PLATFORM'}; +# Read .hipInfo +my %hipInfo = (); +parse_config_file("$HIP_PATH/lib/.hipInfo", \%hipInfo); +$HIP_COMPILER = $hipInfo{'HIP_COMPILER'} // "hcc"; +$HIP_RUNTIME = $hipInfo{'HIP_RUNTIME'} // "HCC"; + if (not defined $HIP_PLATFORM) { if (can_run("$HCC_HOME/bin/hcc") or can_run("hcc")) { $HIP_PLATFORM = "hcc"; + } elsif (can_run("$HIP_CLANG_PATH/clang++") or can_run("clang++")) { + $HIP_PLATFORM = "hcc"; } elsif (can_run("$CUDA_PATH/bin/nvcc") or can_run("nvcc")) { $HIP_PLATFORM = "nvcc"; } else { @@ -97,8 +108,15 @@ if (not defined $HIP_PLATFORM) { } } -if ($HIP_PLATFORM eq "hcc") { - $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include"; +if ($HIP_COMPILER eq "hcc") { + $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include"; +} +if ($HIP_COMPILER eq "clang") { + $HIP_CLANG_VERSION = `$HIP_CLANG_PATH/clang++ --version`; + $HIP_CLANG_VERSION=~/.*clang version ([^ ]+).*/; + $HIP_CLANG_VERSION=$1; + + $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION -I$HSA_PATH/include"; } if ($HIP_PLATFORM eq "nvcc") { $CPP_CONFIG = " -D__HIP_PLATFORM_NVCC__= -I$HIP_PATH/include -I$CUDA_PATH/include"; @@ -118,18 +136,26 @@ if ($p_path) { $printed = 1; } - if ($p_cpp_config) { print $CPP_CONFIG; $printed = 1; } +if ($p_compiler) { + print $HIP_COMPILER; + $printed = 1; +} -if ($p_compiler or $p_platform) { +if ($p_platform) { print $HIP_PLATFORM; $printed = 1; } +if ($p_runtime) { + print $HIP_RUNTIME; + $printed = 1; +} + if ($p_version) { print $HIP_VERSION; $printed = 1; @@ -139,21 +165,41 @@ if (!$printed or $p_full) { print "HIP version : ", $HIP_VERSION, "\n\n"; print "== hipconfig\n"; print "HIP_PATH : ", $HIP_PATH, "\n"; + print "HIP_COMPILER : ", $HIP_COMPILER, "\n"; print "HIP_PLATFORM : ", $HIP_PLATFORM, "\n"; + print "HIP_RUNTIME : ", $HIP_RUNTIME, "\n"; print "CPP_CONFIG : ", $CPP_CONFIG, "\n"; if ($HIP_PLATFORM eq "hcc") { print "\n" ; - print "== hcc\n"; - print ("HSA_PATH : $HSA_PATH\n"); - print ("HCC_HOME : $HCC_HOME\n"); - system("$HCC_HOME/bin/hcc --version"); - system("$HCC_HOME/bin/llc --version"); - print ("HCC-cxxflags : "); - system("$HCC_HOME/bin/hcc-config --cxxflags"); - print ("HCC-ldflags : "); - system("$HCC_HOME/bin/hcc-config --ldflags"); - printf("\n"); + if ($HIP_COMPILER eq "hcc") + { + print "== hcc\n"; + print ("HSA_PATH : $HSA_PATH\n"); + print ("HCC_HOME : $HCC_HOME\n"); + system("$HCC_HOME/bin/hcc --version"); + system("$HCC_HOME/bin/llc --version"); + print ("HCC-cxxflags : "); + system("$HCC_HOME/bin/hcc-config --cxxflags"); + printf("\n"); + print ("HCC-ldflags : "); + system("$HCC_HOME/bin/hcc-config --ldflags"); + printf("\n"); + } + if ($HIP_COMPILER eq "clang") + { + print "== hip-clang\n"; + print ("HSA_PATH : $HSA_PATH\n"); + print ("HIP_CLANG_PATH : $HIP_CLANG_PATH\n"); + system("$HIP_CLANG_PATH/clang++ --version"); + system("$HIP_CLANG_PATH/llc --version"); + print ("hip-clang-cxxflags : "); + system("$HIP_PATH/bin/hipcc --cxxflags"); + printf("\n"); + print ("hip-clang-ldflags : "); + system("$HIP_PATH/bin/hipcc --ldflags"); + printf("\n"); + } } if ($HIP_PLATFORM eq "nvcc") { print "\n" ; From b75b602601c59b3658a4ed49d5d26b2b3055d74e Mon Sep 17 00:00:00 2001 From: Freddy Paul Date: Mon, 6 Apr 2020 21:35:43 -0700 Subject: [PATCH 126/132] hip-on-vdi: Enable RUNPATH and versioning 1. Enable versioning for HIP libraries 2. Enable RUNPATH on HIP libraries to find ROCm dependencies 3. Control HIP package install using CPACK_INSTALL_PREFIX Change-Id: I9096da56592e9c78cdba329011f6e1bae99adaa2 --- CMakeLists.txt | 29 +++++++++++++++-------------- packaging/hip-vdi.txt | 4 +++- vdi/CMakeLists.txt | 7 ++++++- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 19f22fa650..b8e2c6947b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,6 +61,21 @@ add_to_config(_versionInfo HIP_VERSION_MAJOR) add_to_config(_versionInfo HIP_VERSION_MINOR) add_to_config(_versionInfo HIP_VERSION_PATCH) +set (HIP_LIB_VERSION_MAJOR ${HIP_VERSION_MAJOR}) +set (HIP_LIB_VERSION_MINOR ${HIP_VERSION_MINOR}) +if (${ROCM_PATCH_VERSION} ) + set (HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION}) +else () + set (HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH}) +endif () +set (HIP_LIB_VERSION_STRING "${HIP_LIB_VERSION_MAJOR}.${HIP_LIB_VERSION_MINOR}.${HIP_LIB_VERSION_PATCH}") + +if (DEFINED ENV{ROCM_RPATH}) + set (CMAKE_INSTALL_RPATH "$ENV{ROCM_RPATH}") + set (CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) + set (CMAKE_SKIP_BUILD_RPATH TRUE) +endif () + if(CMAKE_CXX_COMPILER MATCHES ".*hcc") set(HIP_COMPILER "hcc" CACHE STRING "HIP Compiler") set(HIP_PLATFORM "hcc" CACHE STRING "HIP Platform") @@ -333,20 +348,6 @@ if(HIP_PLATFORM STREQUAL "hcc") src/env.cpp src/h2f.cpp) - set ( HIP_LIB_VERSION_MAJOR ${HIP_VERSION_MAJOR} ) - set ( HIP_LIB_VERSION_MINOR ${HIP_VERSION_MINOR} ) - if ( ${ROCM_PATCH_VERSION} ) - set ( HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION}) - else () - set ( HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH} ) - endif () - set ( HIP_LIB_VERSION_STRING "${HIP_LIB_VERSION_MAJOR}.${HIP_LIB_VERSION_MINOR}.${HIP_LIB_VERSION_PATCH}" ) - - if ( DEFINED ENV{ROCM_RPATH} ) - set (CMAKE_INSTALL_RPATH "$ENV{ROCM_RPATH}" ) - set (CMAKE_BUILD_WITH_INSTALL_RPATH TRUE ) - set (CMAKE_SKIP_BUILD_RPATH TRUE ) - endif () add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME}) add_library(hip_hcc_static STATIC ${SOURCE_FILES_RUNTIME}) diff --git a/packaging/hip-vdi.txt b/packaging/hip-vdi.txt index 2bbe4331d0..a1f33e9920 100644 --- a/packaging/hip-vdi.txt +++ b/packaging/hip-vdi.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 2.8.3) project(hip_vdi) install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so.@HIP_LIB_VERSION_MAJOR@ DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so.@HIP_LIB_VERSION_STRING@ DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64_static.a DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/lib/libhip_hcc.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/lib/libhiprtc.so DESTINATION lib) @@ -13,7 +15,7 @@ install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-con # Packaging steps ############################# set(CPACK_SET_DESTDIR TRUE) -set(CPACK_INSTALL_PREFIX "/opt/rocm/hip") +set(CPACK_INSTALL_PREFIX @CPACK_INSTALL_PREFIX@) ## cmake generated target files contains IMPORTED_LOCATION_RELEASE etc. which ## is installation path when building the project, which may be different from diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index bfeff521d0..5460b2ba6b 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -1,7 +1,7 @@ #project("hip") cmake_minimum_required(VERSION 3.5.1) -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-keep-memory -Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all -Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") +set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-keep-memory -Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all -Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") if(CMAKE_CXX_FLAGS MATCHES "fsanitize=address") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") @@ -156,6 +156,11 @@ add_library(amdhip64_static STATIC $ ) +set_target_properties( + amdhip64 PROPERTIES + VERSION ${HIP_LIB_VERSION_STRING} + SOVERSION ${HIP_LIB_VERSION_MAJOR} +) # We expect amdhip64_static to contain objects of vdi and hip. But linker # let amdhip64_static contain objects of hip only. So we will use a # a custom amdhip64_static_combiner to combine objects of vid and hip into From cf52b5fca44f2e7ca4b79d1c8ec67be9df2f79d4 Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Mon, 27 Apr 2020 16:57:49 -0400 Subject: [PATCH 127/132] Handle copy cases where the uses pases a bad ptr Change-Id: I4490b8519e4d0dde25b845f9ca7e81c1d80d4f28 --- vdi/hip_memory.cpp | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index bb28106db2..d4be73496d 100755 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -911,13 +911,13 @@ hipError_t ihipMemcpyAtoD(hipArray* srcArray, hipStream_t stream, bool isAsync = false) { cl_mem srcMemObj = reinterpret_cast(srcArray->data); - if (is_valid(srcMemObj) == false) { + size_t dstOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstDevice, dstOffset); + if (!is_valid(srcMemObj) || (dstMemory == nullptr)) { return hipErrorInvalidValue; } amd::Image* srcImage = as_amd(srcMemObj)->asImage(); - size_t dstOffset = 0; - amd::Memory* dstMemory = getMemoryObject(dstDevice, dstOffset); amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcImage->getRowPitch(), srcImage->getSlicePitch())) { @@ -970,13 +970,13 @@ hipError_t ihipMemcpyDtoA(void* srcDevice, size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { + size_t srcOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcDevice, srcOffset); cl_mem dstMemObj = reinterpret_cast(dstArray->data); - if (is_valid(dstMemObj) == false) { + if ((srcMemory == nullptr) || !is_valid(dstMemObj)) { return hipErrorInvalidValue; } - size_t srcOffset = 0; - amd::Memory* srcMemory = getMemoryObject(srcDevice, srcOffset); amd::Image* dstImage = as_amd(dstMemObj)->asImage(); amd::BufferRect srcRect; @@ -1037,6 +1037,10 @@ hipError_t ihipMemcpyDtoD(void* srcDevice, size_t dstOffset = 0; amd::Memory *dstMemory = getMemoryObject(dstDevice, dstOffset); + if ((srcMemory == nullptr) || (dstMemory == nullptr)) { + return hipErrorInvalidValue; + } + amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { return hipErrorInvalidValue; @@ -1101,6 +1105,10 @@ hipError_t ihipMemcpyDtoH(void* srcDevice, size_t srcOffset = 0; amd::Memory *srcMemory = getMemoryObject(srcDevice, srcOffset); + if ((srcMemory == nullptr) || (dstHost == nullptr)) { + return hipErrorInvalidValue; + } + amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { return hipErrorInvalidValue; @@ -1156,6 +1164,10 @@ hipError_t ihipMemcpyHtoD(const void* srcHost, size_t dstOffset = 0; amd::Memory *dstMemory = getMemoryObject(dstDevice, dstOffset); + if ((srcHost == nullptr) || (dstMemory == nullptr)) { + return hipErrorInvalidValue; + } + amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { return hipErrorInvalidValue; @@ -1206,6 +1218,10 @@ hipError_t ihipMemcpyHtoH(const void* srcHost, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch) { + if ((srcHost == nullptr) || (dstHost == nullptr)) { + return hipErrorInvalidValue; + } + amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { return hipErrorInvalidValue; @@ -1287,12 +1303,8 @@ hipError_t ihipMemcpyHtoA(const void* srcHost, size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { - if (srcHost == nullptr) { - return hipErrorInvalidValue; - } - cl_mem dstMemObj = reinterpret_cast(dstArray->data); - if (is_valid(dstMemObj) == false) { + if ((srcHost == nullptr) || !is_valid(dstMemObj)) { return hipErrorInvalidValue; } @@ -1344,11 +1356,7 @@ hipError_t ihipMemcpyAtoH(hipArray* srcArray, hipStream_t stream, bool isAsync = false) { cl_mem srcMemObj = reinterpret_cast(srcArray->data); - if (!is_valid(srcMemObj)) { - return hipErrorInvalidValue; - } - - if (dstHost == nullptr) { + if (!is_valid(srcMemObj) || (dstHost == nullptr)) { return hipErrorInvalidValue; } @@ -1357,7 +1365,6 @@ hipError_t ihipMemcpyAtoH(hipArray* srcArray, return hipErrorInvalidValue; } - amd::Image* srcImage = as_amd(srcMemObj)->asImage(); // HIP assumes the width is in bytes, but OCL assumes it's in pixels. const size_t elementSize = srcImage->getImageFormat().getElementSize(); From db3d4bc84856fc960b68ba6ddfedc658e5c8c109 Mon Sep 17 00:00:00 2001 From: Payam Date: Wed, 15 Apr 2020 18:49:08 -0400 Subject: [PATCH 128/132] updating package name to hip-rocclr instead of hip-vdi Change-Id: Ife8810d4dd224ce29c2e2a827b221a618c9f1b3d --- packaging/hip-samples.txt | 2 +- packaging/hip-vdi.txt | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/packaging/hip-samples.txt b/packaging/hip-samples.txt index 1f3e088153..6f643c3865 100644 --- a/packaging/hip-samples.txt +++ b/packaging/hip-samples.txt @@ -26,7 +26,7 @@ set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION}) -set(CPACK_RPM_PACKAGE_REQUIRES "hip-vdi = ${HIP_BASE_VERSION}") +set(CPACK_RPM_PACKAGE_REQUIRES "hip-rocclr = ${HIP_BASE_VERSION}") set(CPACK_RPM_PACKAGE_OBSOLETES "hip_samples") set(CPACK_RPM_PACKAGE_CONFLICTS "hip_samples") set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") diff --git a/packaging/hip-vdi.txt b/packaging/hip-vdi.txt index a1f33e9920..04eb892cc8 100644 --- a/packaging/hip-vdi.txt +++ b/packaging/hip-vdi.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 2.8.3) -project(hip_vdi) +project(hip_rocclr) install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so.@HIP_LIB_VERSION_MAJOR@ DESTINATION lib) @@ -27,9 +27,9 @@ foreach(_target_file ${_target_files}) endforeach() install(FILES ${_target_files} DESTINATION lib/cmake/hip) -set(CPACK_PACKAGE_NAME "hip-vdi") -set(HCC_PACKAGE_NAME "vdi") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [VDI]") +set(CPACK_PACKAGE_NAME "hip-rocclr") +set(HCC_PACKAGE_NAME "rocclr") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [ROCClr]") set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") set(CPACK_PACKAGE_CONTACT "Maneesh Gupta ") set(CPACK_PACKAGE_VERSION @HIP_VERSION_MAJOR@.@HIP_VERSION_MINOR@.@HIP_VERSION_PATCH@) @@ -41,9 +41,9 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), comgr (>= 1.1), llvm-amdgpu") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_vdi, hip-hcc (= ${CPACK_PACKAGE_VERSION})") -set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_vdi") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_vdi") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_rocclr, hip-hcc (= ${CPACK_PACKAGE_VERSION})") +set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_rocclr") +set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_rocclr") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") @@ -51,9 +51,9 @@ set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION}) set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, comgr >= 1.1, llvm-amdgpu") -set(CPACK_RPM_PACKAGE_PROVIDES "hip_vdi, hip-hcc = ${HIP_BASE_VERSION}") -set(CPACK_RPM_PACKAGE_OBSOLETES "hip_vdi") -set(CPACK_RPM_PACKAGE_CONFLICTS "hip_vdi") +set(CPACK_RPM_PACKAGE_PROVIDES "hip_rocclr, hip-hcc = ${HIP_BASE_VERSION}") +set(CPACK_RPM_PACKAGE_OBSOLETES "hip_rocclr") +set(CPACK_RPM_PACKAGE_CONFLICTS "hip_rocclr") set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") set(CPACK_SOURCE_GENERATOR "TGZ") include(CPack) From ded92a5d2a2f89506e27c6b1d83151ce77a8a66e Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Tue, 28 Apr 2020 16:42:13 -0400 Subject: [PATCH 129/132] Fix order initialization warning Change-Id: I77ddf1dbec2d11c273163efae6526d3deca92dbf --- vdi/hip_internal.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index 529ca7abdb..c0ced2e8a3 100644 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -109,11 +109,11 @@ namespace hip { amd::Monitor lock_{"Device lock"}; /// VDI context amd::Context* context_; - /// VDI host queue for default streams - Stream null_stream_; /// Device's ID /// Store it here so we don't have to loop through the device list every time int deviceId_; + /// VDI host queue for default streams + Stream null_stream_; //Maintain list of user enabled peers std::list userEnabledPeers; From deb2c399c9d573fb2d70a7c84dc6e6604afe2172 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Wed, 29 Apr 2020 02:11:37 -0400 Subject: [PATCH 130/132] Clean-up the list of blocking streams - Insert the stream into the list on the host queue creation, instead of stream creation Change-Id: Ib25053019f7df97e5bc786922a6587b9514852d3 --- vdi/hip_event.cpp | 6 +-- vdi/hip_internal.hpp | 6 +-- vdi/hip_stream.cpp | 98 ++++++++++++++++++++++++++------------------ 3 files changed, 63 insertions(+), 47 deletions(-) diff --git a/vdi/hip_event.cpp b/vdi/hip_event.cpp index 0cd061c1e8..26191ec380 100644 --- a/vdi/hip_event.cpp +++ b/vdi/hip_event.cpp @@ -217,18 +217,14 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_RETURN(hipErrorInvalidHandle); } - hip::Event* e = reinterpret_cast(event); - - hip::Stream* s = reinterpret_cast(stream); amd::HostQueue* queue = hip::getQueue(stream); - amd::Command* command = queue->getLastQueuedCommand(true); - if (command == nullptr) { command = new amd::Marker(*queue, false); command->enqueue(); } + hip::Event* e = reinterpret_cast(event); e->addMarker(queue, command); HIP_RETURN(hipSuccess); diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index c0ced2e8a3..3d7c5249a8 100644 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -90,10 +90,10 @@ namespace hip { public: Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f = 0, bool null_stream = false); - bool create(); + bool Create(); amd::HostQueue* asHostQueue(); - void destroy(); - void finish() const; + void Destroy(); + void Finish() const; /// Get device ID associated with the current stream; int DeviceId() const; /// Returns if stream is null stream diff --git a/vdi/hip_stream.cpp b/vdi/hip_stream.cpp index b2838c0164..fbcd223ed2 100644 --- a/vdi/hip_stream.cpp +++ b/vdi/hip_stream.cpp @@ -42,51 +42,71 @@ class StreamCallback { namespace hip { +// ================================================================================================ Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f, bool null_stream) : queue_(nullptr), lock_("Stream Callback lock"), device_(dev), priority_(p), flags_(f), null_(null_stream) {} -bool Stream::create() { +// ================================================================================================ +bool Stream::Create() { cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; queue_ = new amd::HostQueue(*device_->asContext(), *device_->devices()[0], properties, amd::CommandQueue::RealTimeDisabled, priority_); - assert(queue_ != nullptr); - return queue_->create(); -} - -amd::HostQueue* Stream::asHostQueue() { - if (queue_ == nullptr) { - if (!create()) { - return nullptr; - } else if (Null()) { - // Make sure the null stream is inserted into the list of default/blocking streams + // Create a host queue + bool result = (queue_ != nullptr) ? queue_->create() : false; + // Insert just created stream into the list of the blocking queues + if (result) { + if (!(flags_ & hipStreamNonBlocking)) { amd::ScopedLock lock(streamSetLock); streamSet.insert(this); } + } else { + Destroy(); + } + return result; +} + +// ================================================================================================ +amd::HostQueue* Stream::asHostQueue() { + // Access to the stream object is lock protected, because possible allocation + amd::ScopedLock l(Lock()); + if (queue_ == nullptr) { + // Create the host queue for the first time + if (!Create()) { + return nullptr; + } } return queue_; } -void Stream::destroy() { +// ================================================================================================ +void Stream::Destroy() { if (queue_ != nullptr) { queue_->release(); queue_ = nullptr; + + amd::ScopedLock lock(streamSetLock); + streamSet.erase(this); } + delete this; } -void Stream::finish() const { +// ================================================================================================ +void Stream::Finish() const { if (queue_ != nullptr) { queue_->finish(); } } +// ================================================================================================ int Stream::DeviceId() const { return device_->deviceId(); } }; +// ================================================================================================ void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream) { amd::Command::EventWaitList eventWaitList; { @@ -95,12 +115,12 @@ void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream for (const auto& stream : streamSet) { amd::HostQueue* active_queue = stream->asHostQueue(); // If it's the current device - if ((active_queue != nullptr) && (&active_queue->device() == &blocking_queue->device()) && + if ((&active_queue->device() == &blocking_queue->device()) && // and it's not the current stream (active_queue != blocking_queue) && // check for a wait on the null stream (stream->Null() == wait_null_stream)) { - // Get the last valid so command + // Get the last valid command amd::Command* command = active_queue->getLastQueuedCommand(true); if ((command != nullptr) && // Check the current active status @@ -126,6 +146,7 @@ void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream } } +// ================================================================================================ void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { hipError_t status = hipSuccess; StreamCallback* cbo = reinterpret_cast(user_data); @@ -137,18 +158,15 @@ void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, delete cbo; } -static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) { +// ================================================================================================ +static hipError_t ihipStreamCreate(hipStream_t* stream, + unsigned int flags, amd::CommandQueue::Priority priority) { hip::Stream* hStream = new hip::Stream(hip::getCurrentDevice(), priority, flags); if (hStream == nullptr) { return hipErrorOutOfMemory; } - if (!(flags & hipStreamNonBlocking)) { - amd::ScopedLock lock(streamSetLock); - streamSet.insert(hStream); - } - *stream = reinterpret_cast(hStream); ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipStreamCreate: %zx", hStream); @@ -156,18 +174,21 @@ static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd: return hipSuccess; } +// ================================================================================================ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { HIP_INIT_API(hipStreamCreateWithFlags, stream, flags); HIP_RETURN(ihipStreamCreate(stream, flags, amd::CommandQueue::Priority::Normal)); } +// ================================================================================================ hipError_t hipStreamCreate(hipStream_t *stream) { HIP_INIT_API(hipStreamCreate, stream); HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, amd::CommandQueue::Priority::Normal)); } +// ================================================================================================ hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority); @@ -180,6 +201,7 @@ hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, return HIP_RETURN(ihipStreamCreate(stream, flags, static_cast(priority))); } +// ================================================================================================ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority); @@ -193,13 +215,12 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio return HIP_RETURN(hipSuccess); } +// ================================================================================================ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) { HIP_INIT_API(hipStreamGetFlags, stream, flags); - hip::Stream* hStream = reinterpret_cast(stream); - - if (flags != nullptr && hStream != nullptr) { - *flags = hStream->Flags(); + if ((flags != nullptr) && (stream != nullptr)) { + *flags = reinterpret_cast(stream)->Flags(); } else { HIP_RETURN(hipErrorInvalidValue); } @@ -207,15 +228,17 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) { HIP_RETURN(hipSuccess); } +// ================================================================================================ hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_API(hipStreamSynchronize, stream); - amd::HostQueue* hostQueue = hip::getQueue(stream); - hostQueue->finish(); + // Wait for the current host queue + hip::getQueue(stream)->finish(); HIP_RETURN(hipSuccess); } +// ================================================================================================ hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(hipStreamDestroy, stream); @@ -223,32 +246,27 @@ hipError_t hipStreamDestroy(hipStream_t stream) { HIP_RETURN(hipErrorInvalidHandle); } - amd::ScopedLock lock(streamSetLock); - - hip::Stream* hStream = reinterpret_cast(stream); - - hStream->destroy(); - streamSet.erase(hStream); - - delete hStream; + reinterpret_cast(stream)->Destroy(); HIP_RETURN(hipSuccess); } +// ================================================================================================ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_API(hipStreamWaitEvent, stream, event, flags); - amd::HostQueue* queue = hip::getQueue(stream); - if (event == nullptr) { HIP_RETURN(hipErrorInvalidHandle); } + amd::HostQueue* queue = hip::getQueue(stream); + hip::Event* e = reinterpret_cast(event); - return HIP_RETURN(e->streamWait(queue, flags)); + HIP_RETURN(e->streamWait(queue, flags)); } +// ================================================================================================ hipError_t hipStreamQuery(hipStream_t stream) { HIP_INIT_API(hipStreamQuery, stream); @@ -256,6 +274,7 @@ hipError_t hipStreamQuery(hipStream_t stream) { amd::Command* command = hostQueue->getLastQueuedCommand(true); if (command == nullptr) { + // Nothing was submitted to the queue HIP_RETURN(hipSuccess); } @@ -268,11 +287,12 @@ hipError_t hipStreamQuery(hipStream_t stream) { HIP_RETURN(status); } +// ================================================================================================ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, unsigned int flags) { HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); - amd::HostQueue* hostQueue = reinterpret_cast(stream)->asHostQueue(); + amd::HostQueue* hostQueue = hip::getQueue(stream); amd::Command* command = hostQueue->getLastQueuedCommand(true); if (command == nullptr) { amd::Command::EventWaitList eventWaitList; From 34b32e35c99d0082202db2b11e6a2be6db5417fd Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Tue, 28 Apr 2020 14:17:58 -0700 Subject: [PATCH 131/132] Fix error code when hipEventRecord is not called Change-Id: I6672d4320280fd87fd2786bc54214d40944da845 --- vdi/hip_event.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vdi/hip_event.cpp b/vdi/hip_event.cpp index 26191ec380..f2738169a8 100644 --- a/vdi/hip_event.cpp +++ b/vdi/hip_event.cpp @@ -35,8 +35,9 @@ bool Event::ready() { hipError_t Event::query() { amd::ScopedLock lock(lock_); + // If event is not recorded, event_ is null, hence return hipSuccess if (event_ == nullptr) { - return hipErrorInvalidHandle; + return hipSuccess; } return ready() ? hipSuccess : hipErrorNotReady; @@ -45,8 +46,9 @@ hipError_t Event::query() { hipError_t Event::synchronize() { amd::ScopedLock lock(lock_); + // If event is not recorded, event_ is null, hence return hipSuccess if (event_ == nullptr) { - return hipErrorInvalidHandle; + return hipSuccess; } event_->awaitCompletion(); From f21fd3e22ed3411ca964d0185073b07881bbea1e Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Tue, 28 Apr 2020 22:41:45 -0700 Subject: [PATCH 132/132] Don't make a copy of the embedded device binaries Embedded device binaries should not be cloned if we want the ROCR's code object URI for this binary to point to the container (host executable or shared library) instead of a memory location. Depends-On: I7973bb0243f5a2d1b639b8a88445cfe6af919dd7 Change-Id: Id3afc981e027e5371114d011f2caaa6414f5fc58 --- vdi/hip_platform.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index f2436c4f17..f7cc1754f2 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -164,7 +164,8 @@ void PlatformState::digestFatBinary(const void* data, std::vectoraddDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { + if (CL_SUCCESS == program->addDeviceProgram( + *ctx->devices()[0], code_objs[dev].first, code_objs[dev].second, false)) { programs.at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); } }