From e9daf7624b2d61fef8634c8b43b016950d8df29f Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Tue, 23 Oct 2018 21:13:11 +0800 Subject: [PATCH 01/23] Make correct checking of the returned hipDeviceptr_t from read_global_description() [ROCm/hip commit: de5f47a98410e08761199be33134fc012e208353] --- projects/hip/src/hip_module.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/hip/src/hip_module.cpp b/projects/hip/src/hip_module.cpp index a6d486b6de..bb419740e3 100644 --- a/projects/hip/src/hip_module.cpp +++ b/projects/hip/src/hip_module.cpp @@ -342,7 +342,7 @@ hipError_t read_agent_global_from_module(hipDeviceptr_t* dptr, size_t* bytes, hi tie(*dptr, *bytes) = read_global_description(it0->second.cbegin(), it0->second.cend(), name); - return dptr ? hipSuccess : hipErrorNotFound; + return *dptr ? hipSuccess : hipErrorNotFound; } hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, const char* name) { @@ -367,7 +367,7 @@ hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, c tie(*dptr, *bytes) = read_global_description(it->second.cbegin(), it->second.cend(), name); - return dptr ? hipSuccess : hipErrorNotFound; + return *dptr ? hipSuccess : hipErrorNotFound; } hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const char* kname) { From 3e91f67882636b56106a795b17a99ab136639eb6 Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Thu, 25 Oct 2018 12:19:32 -0400 Subject: [PATCH 02/23] Adding checks and debug output for fat binary for hip-clang [ROCm/hip commit: 062398c72f81940c71f0f7728ebf58feaf790ef3] --- projects/hip/src/hip_clang.cpp | 53 ++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/projects/hip/src/hip_clang.cpp b/projects/hip/src/hip_clang.cpp index 15a96d298a..6067edd289 100644 --- a/projects/hip/src/hip_clang.cpp +++ b/projects/hip/src/hip_clang.cpp @@ -86,6 +86,7 @@ __hipRegisterFatBinary(const void* data) std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)], desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)}; + tprintf(DB_FB, "Found bundle for %s\n", target.c_str()); for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { hsa_agent_t agent = g_allAgents[deviceId + 1]; @@ -110,10 +111,25 @@ __hipRegisterFatBinary(const void* data) if (module->executable.handle) { modules->at(deviceId) = module; + tprintf(DB_FB, "Loaded code object for %s\n", name); + } else { + fprintf(stderr, "Failed to load code object for %s\n", name); + abort(); } } } + for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { + hsa_agent_t agent = g_allAgents[deviceId + 1]; + + char name[64] = {}; + hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name); + if (!(*modules)[deviceId]) { + fprintf(stderr, "No device code bundle for %s\n", name); + abort(); + } + } + tprintf(DB_FB, "__hipRegisterFatBinary succeeds and returns %p\n", modules); return modules; } @@ -132,13 +148,18 @@ extern "C" void __hipRegisterFunction( dim3* gridDim, int* wSize) { + HIP_INIT_API(modules, hostFunction, deviceFunction, deviceName); std::vector functions{g_deviceCnt}; + assert(modules && modules->size() >= g_deviceCnt); for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { hipFunction_t function; if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName)) { functions[deviceId] = function; } + else { + tprintf(DB_FB, "missing kernel %s for device %d\n", deviceName, deviceId); + } } g_functions.insert(std::make_pair(hostFunction, std::move(functions))); @@ -180,6 +201,7 @@ hipError_t hipSetupArgument( size_t size, size_t offset) { + HIP_INIT_API(arg, size, offset); auto ctx = ihipGetTlsDefaultCtx(); LockedAccessor_CtxCrit_t crit(ctx->criticalData()); auto& arguments = crit->_execStack.top()._arguments; @@ -194,6 +216,7 @@ hipError_t hipSetupArgument( hipError_t hipLaunchByPtr(const void *hostFunction) { + HIP_INIT_API(hostFunction); ihipExec_t exec; { auto ctx = ihipGetTlsDefaultCtx(); @@ -213,20 +236,26 @@ hipError_t hipLaunchByPtr(const void *hostFunction) deviceId = 0; } + hipError_t e = hipSuccess; decltype(g_functions)::iterator it; - if ((it = g_functions.find(hostFunction)) == g_functions.end()) - return hipErrorUnknown; + if ((it = g_functions.find(hostFunction)) == g_functions.end()) { + e = hipErrorUnknown; + fprintf(stderr, "kernel %p not found!\n", hostFunction); + abort(); + } else { + size_t size = exec._arguments.size(); + void *extra[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0], + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; - size_t size = exec._arguments.size(); - void *extra[] = { - HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0], - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END - }; + e = hipModuleLaunchKernel(it->second[deviceId], + exec._gridDim.x, exec._gridDim.y, exec._gridDim.z, + exec._blockDim.x, exec._blockDim.y, exec._blockDim.z, + exec._sharedMem, exec._hStream, nullptr, extra); + } - return hipModuleLaunchKernel(it->second[deviceId], - exec._gridDim.x, exec._gridDim.y, exec._gridDim.z, - exec._blockDim.x, exec._blockDim.y, exec._blockDim.z, - exec._sharedMem, exec._hStream, nullptr, extra); + return ihipLogStatus(e); } From fa429022e10f02c6a3ff36f6ef76c5be34d7dc3e Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Fri, 26 Oct 2018 14:11:18 -0400 Subject: [PATCH 03/23] Add HIP_DUMP_CODE_OBJECT [ROCm/hip commit: f5d8842f6a74d7c01de88064b56b230ba46a597a] --- projects/hip/src/hip_clang.cpp | 11 +++++++++++ projects/hip/src/hip_hcc.cpp | 6 ++++++ projects/hip/src/hip_hcc_internal.h | 2 +- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/projects/hip/src/hip_clang.cpp b/projects/hip/src/hip_clang.cpp index 6067edd289..cfd75df562 100644 --- a/projects/hip/src/hip_clang.cpp +++ b/projects/hip/src/hip_clang.cpp @@ -22,6 +22,7 @@ THE SOFTWARE. #include #include +#include #include "hip/hip_runtime.h" #include "hip_hcc_internal.h" @@ -112,6 +113,16 @@ __hipRegisterFatBinary(const void* data) if (module->executable.handle) { modules->at(deviceId) = module; tprintf(DB_FB, "Loaded code object for %s\n", name); + if (HIP_DUMP_CODE_OBJECT) { + char fname[30]; + static std::atomic index; + sprintf(fname, "__hip_dump_code_object%04d.o", index++); + tprintf(DB_FB, "Dump code object %s\n", fname); + std::ofstream ofs; + ofs.open(fname, std::ios::binary); + ofs << image; + ofs.close(); + } } else { fprintf(stderr, "Failed to load code object for %s\n", name); abort(); diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index eff93da847..e152e7ba69 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -97,6 +97,8 @@ int HIP_INIT_ALLOC = -1; int HIP_SYNC_STREAM_WAIT = 0; int HIP_FORCE_NULL_STREAM = 0; +int HIP_DUMP_CODE_OBJECT = 0; + #if (__hcc_workweek__ >= 17300) // Make sure we have required bug fix in HCC @@ -1294,6 +1296,10 @@ void HipReadEnv() { "overridden by specifying hipEventReleaseToSystem or hipEventReleaseToDevice flag " "when creating the event."); + READ_ENV_I(release, HIP_DUMP_CODE_OBJECT, 0, + "If set, dump code object as __hip_dump_code_object[nnnn].o in the current directory," + "where nnnn is the index number."); + // Some flags have both compile-time and runtime flags - generate a warning if user enables the // runtime flag but the compile-time flag is disabled. if (HIP_DB && !COMPILE_HIP_DB) { diff --git a/projects/hip/src/hip_hcc_internal.h b/projects/hip/src/hip_hcc_internal.h index d64a4a4cbe..8102f066de 100644 --- a/projects/hip/src/hip_hcc_internal.h +++ b/projects/hip/src/hip_hcc_internal.h @@ -83,11 +83,11 @@ extern int HIP_SYNC_NULL_STREAM; extern int HIP_INIT_ALLOC; extern int HIP_FORCE_NULL_STREAM; +extern int HIP_DUMP_CODE_OBJECT; // TODO - remove when this is standard behavior. extern int HCC_OPT_FLUSH; - // Class to assign a short TID to each new thread, for HIP debugging purposes. class TidInfo { public: From 6d64f5e112c17e2490d9d78d99ef09e4c5b3eeb4 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sun, 28 Oct 2018 17:01:00 +0000 Subject: [PATCH 04/23] Rely on code object metadat for kernarg arguments alignof and sizeof. [ROCm/hip commit: fe1e963299c1f4b63652e91a3f156d8a043d317b] --- .../hip/hcc_detail/functional_grid_launch.hpp | 37 +++++-- .../include/hip/hcc_detail/program_state.hpp | 2 + projects/hip/src/program_state.cpp | 102 +++++++++++++++++- 3 files changed, 132 insertions(+), 9 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp index 66e5873f3a..3a19965974 100644 --- a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp @@ -33,6 +33,7 @@ THE SOFTWARE. #include #include +#include #include #include #include @@ -56,7 +57,9 @@ template < typename... Ts, typename std::enable_if::type* = nullptr> inline std::vector make_kernarg( - std::vector kernarg, const std::tuple&) { + const std::tuple&, + const std::vector>&, + std::vector kernarg) { return kernarg; } @@ -65,7 +68,9 @@ template < typename... Ts, typename std::enable_if::type* = nullptr> inline std::vector make_kernarg( - std::vector kernarg, const std::tuple& formals) { + const std::tuple& formals, + const std::vector>& size_align, + std::vector kernarg) { using T = typename std::tuple_element>::type; static_assert( @@ -80,24 +85,42 @@ inline std::vector make_kernarg( #endif kernarg.resize(round_up_to_next_multiple_nonnegative( - kernarg.size(), alignof(T)) + sizeof(T)); + kernarg.size(), size_align[n].second) + + size_align[n].first); - new (kernarg.data() + kernarg.size() - sizeof(T)) T{std::get(formals)}; + std::memcpy( + kernarg.data() + kernarg.size() - size_align[n].first, + &std::get(formals), + size_align[n].first); - return make_kernarg(std::move(kernarg), formals); + return make_kernarg(formals, size_align, std::move(kernarg)); } template inline std::vector make_kernarg( - void (*)(Formals...), std::tuple actuals) { + void (*kernel)(Formals...), std::tuple actuals) { static_assert(sizeof...(Formals) == sizeof...(Actuals), "The count of formal arguments must match the count of actuals."); + const auto it = function_names().find( + reinterpret_cast(kernel)); + + if (it == function_names().cend()) { + throw std::runtime_error{"Undefined __global__ function."}; + } + + const auto it1 = kernargs().find(it->second); + + if (it1 == kernargs().end()) { + throw std::runtime_error{ + "Missing metadata for __global__ function: " + it->second}; + } + std::tuple to_formals{std::move(actuals)}; std::vector kernarg; kernarg.reserve(sizeof(to_formals)); - return make_kernarg<0>(std::move(kernarg), to_formals); + return make_kernarg<0>(to_formals, it1->second, std::move(kernarg)); } void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks, diff --git a/projects/hip/include/hip/hcc_detail/program_state.hpp b/projects/hip/include/hip/hcc_detail/program_state.hpp index bdb87b3509..92bef22172 100644 --- a/projects/hip/include/hip/hcc_detail/program_state.hpp +++ b/projects/hip/include/hip/hcc_detail/program_state.hpp @@ -99,6 +99,8 @@ const std::unordered_map& function_names(bool rebuild = false); std::unordered_map& globals(bool rebuild = false); +std::unordered_map< + std::string, std::vector>>& kernargs(); hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, hsa_agent_t agent); diff --git a/projects/hip/src/program_state.cpp b/projects/hip/src/program_state.cpp index 8766134582..43ceedee7b 100644 --- a/projects/hip/src/program_state.cpp +++ b/projects/hip/src/program_state.cpp @@ -306,8 +306,8 @@ const unordered_map>& kernels(bool rebui void load_code_object_and_freeze_executable( const string& file, hsa_agent_t agent, - hsa_executable_t - executable) { // TODO: the following sequence is inefficient, should be refactored + hsa_executable_t executable) { + // TODO: the following sequence is inefficient, should be refactored // into a single load of the file and subsequent ELFIO // processing. static const auto cor_deleter = [](hsa_code_object_reader_t* p) { @@ -334,6 +334,85 @@ void load_code_object_and_freeze_executable( code_readers.push_back(move(tmp)); } } + +size_t parse_args( + const string& metadata, + size_t f, + size_t l, + vector>& size_align) { + if (f == l) return f; + + do { + static constexpr size_t size_sz{5}; + f = metadata.find("Size:", f) + size_sz; + + if (l <= f) return f; + + auto size = strtoul(&metadata[f], nullptr, 10); + + static constexpr size_t align_sz{6}; + f = metadata.find("Align:", f) + align_sz; + + char* l{}; + auto align = strtoul(&metadata[f], &l, 10); + + f += (l - &metadata[f]) + 1; + + size_align.emplace_back(size, align); + } while (true); +} + +void read_kernarg_metadata( + elfio& reader, + unordered_map>>& kernargs) +{ // TODO: this is inefficient. + auto it = find_section_if( + reader, [](const section* x) { return x->get_type() == SHT_NOTE; }); + + if (!it) return; + + const note_section_accessor acc{reader, it}; + for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) { + ELFIO::Elf_Word type{}; + string name{}; + void* desc{}; + Elf_Word desc_size{}; + + acc.get_note(i, type, name, desc, desc_size); + + if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA. + + string tmp{ + static_cast(desc), static_cast(desc) + desc_size}; + + auto dx = tmp.find("Kernels:"); + + if (dx == string::npos) continue; + + static constexpr decltype(tmp.size()) kernels_sz{8}; + dx += kernels_sz; + + do { + dx = tmp.find("Name:", dx); + + if (dx == string::npos) break; + + static constexpr decltype(tmp.size()) name_sz{5}; + dx = tmp.find_first_not_of(' ', dx + name_sz); + + auto fn = tmp.substr(dx, tmp.find('\n', dx) - dx); + dx += fn.size(); + + dx = tmp.find("Args:", dx); + + if (dx == string::npos) break; + + static constexpr decltype(tmp.size()) args_sz{5}; + dx = parse_args( + tmp, dx + args_sz, tmp.find("CodeProps", dx), kernargs[fn]); + } while (true); + } +} } // namespace namespace hip_impl { @@ -495,6 +574,25 @@ unordered_map& globals(bool rebuild) { return r; } +unordered_map>>& kernargs() { + static unordered_map>> r; + static once_flag f; + + call_once(f, []() { + for (auto&& blob : code_object_blobs()) { + stringstream tmp{std::string{ + blob.second.front().cbegin(), blob.second.front().cend()}}; + + elfio reader; + if (!reader.load(tmp)) continue; + + read_kernarg_metadata(reader, r); + } + }); + + return r; +} + hsa_executable_t load_executable(const string& file, hsa_executable_t executable, hsa_agent_t agent) { elfio reader; From 2a69dd1fb6f2022b0c05ef123501887f252959dd Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 30 Oct 2018 01:55:09 +0000 Subject: [PATCH 05/23] Handle the very confusing dual encoding of the symbol name. [ROCm/hip commit: bce3de81624d9b3d114074d87cb6defc353bd2cb] --- projects/hip/src/program_state.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/hip/src/program_state.cpp b/projects/hip/src/program_state.cpp index 43ceedee7b..4cee70c8f2 100644 --- a/projects/hip/src/program_state.cpp +++ b/projects/hip/src/program_state.cpp @@ -398,11 +398,10 @@ void read_kernarg_metadata( if (dx == string::npos) break; static constexpr decltype(tmp.size()) name_sz{5}; - dx = tmp.find_first_not_of(' ', dx + name_sz); + dx = tmp.find_first_not_of(" '", dx + name_sz); - auto fn = tmp.substr(dx, tmp.find('\n', dx) - dx); + auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx); dx += fn.size(); - dx = tmp.find("Args:", dx); if (dx == string::npos) break; @@ -590,6 +589,7 @@ unordered_map>>& kernargs() { } }); + for (auto&& x : r) std::cerr << x.first << std::endl; return r; } From d4815f4178f1755186d69dfb562cd31ce8892dbf Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 31 Oct 2018 03:22:38 +0530 Subject: [PATCH 06/23] Fixes global symbols tracking in hip_module [ROCm/hip commit: b270313129be98995fca63e32e37ea796fd62a98] --- projects/hip/src/hip_module.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/projects/hip/src/hip_module.cpp b/projects/hip/src/hip_module.cpp index a6d486b6de..0aa171631c 100644 --- a/projects/hip/src/hip_module.cpp +++ b/projects/hip/src/hip_module.cpp @@ -267,7 +267,12 @@ inline void track(const Agent_global& x) { hc::AmPointerInfo ptr_info(nullptr, x.address, x.address, x.byte_cnt, device->_acc, true, false); hc::am_memtracker_add(x.address, ptr_info); +#if USE_APP_PTR_FOR_CTX + hc::am_memtracker_update(x.address, device->_deviceId, 0u, ihipGetTlsDefaultCtx()); +#else hc::am_memtracker_update(x.address, device->_deviceId, 0u); +#endif + } template > From d3beba42d37d535eccd664b603f13a11f7abd2e5 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 30 Oct 2018 23:34:27 +0000 Subject: [PATCH 07/23] If we've already seen a `__global__` function we do not need to re-parse [ROCm/hip commit: f7ba987038cafd7799c5ea5f5e347b7098a39d85] --- projects/hip/src/program_state.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/hip/src/program_state.cpp b/projects/hip/src/program_state.cpp index 4cee70c8f2..00d8e3a0b2 100644 --- a/projects/hip/src/program_state.cpp +++ b/projects/hip/src/program_state.cpp @@ -341,6 +341,7 @@ size_t parse_args( size_t l, vector>& size_align) { if (f == l) return f; + if (!size_align.empty()) return l; do { static constexpr size_t size_sz{5}; @@ -589,7 +590,6 @@ unordered_map>>& kernargs() { } }); - for (auto&& x : r) std::cerr << x.first << std::endl; return r; } From 1c6f47ef5504aea287e41f4bc8421fa1d73b5e57 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Mon, 5 Nov 2018 22:34:16 +0000 Subject: [PATCH 08/23] Remove non-working non-default-rounded math apis In ROCm-Device-Libs, they have dropped the non-default-rounded versions of add, sub, mul, div, sqrt and fma. Therefore, ocml has removed the rte, rtp, rtn, and rtz counterparts. This will remove the same math APIs in HIP for _ru, _rd, _rn, and _rz. [ROCm/hip commit: 2b108a2cfdbc8c1f2e0b8f59271fc7f1224dce80] --- .../include/hip/hcc_detail/math_functions.h | 336 +++++++++--------- .../hip/include/hip/hcc_detail/math_fwd.h | 50 ++- .../hipDoublePrecisionIntrinsics.cpp | 48 +-- .../hipSinglePrecisionIntrinsics.cpp | 48 +-- 4 files changed, 265 insertions(+), 217 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/math_functions.h b/projects/hip/include/hip/hcc_detail/math_functions.h index b12e7aca89..8a6091858b 100644 --- a/projects/hip/include/hip/hcc_detail/math_functions.h +++ b/projects/hip/include/hip/hcc_detail/math_functions.h @@ -514,69 +514,69 @@ float __exp10f(float x) { return __ocml_exp10_f32(x); } __DEVICE__ inline float __expf(float x) { return __ocml_exp_f32(x); } -__DEVICE__ -inline -float __fadd_rd(float x, float y) { return __ocml_add_rtp_f32(x, y); } -__DEVICE__ -inline -float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); } -__DEVICE__ -inline -float __fadd_ru(float x, float y) { return __ocml_add_rtn_f32(x, y); } -__DEVICE__ -inline -float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } -__DEVICE__ -inline -float __fdiv_rd(float x, float y) { return x / y; } -__DEVICE__ -inline -float __fdiv_rn(float x, float y) { return x / y; } -__DEVICE__ -inline -float __fdiv_ru(float x, float y) { return x / y; } -__DEVICE__ -inline -float __fdiv_rz(float x, float y) { return x / y; } +// __DEVICE__ +// inline +// float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } +// __DEVICE__ +// inline +// float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); } +// __DEVICE__ +// inline +// float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } +// __DEVICE__ +// inline +// float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } +// __DEVICE__ +// inline +// float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } +// __DEVICE__ +// inline +// float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); } +// __DEVICE__ +// inline +// float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } +// __DEVICE__ +// inline +// float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } __DEVICE__ inline float __fdividef(float x, float y) { return x / y; } -__DEVICE__ -inline -float __fmaf_rd(float x, float y, float z) -{ - return __ocml_fma_rtp_f32(x, y, z); -} -__DEVICE__ -inline -float __fmaf_rn(float x, float y, float z) -{ - return __ocml_fma_rte_f32(x, y, z); -} -__DEVICE__ -inline -float __fmaf_ru(float x, float y, float z) -{ - return __ocml_fma_rtn_f32(x, y, z); -} -__DEVICE__ -inline -float __fmaf_rz(float x, float y, float z) -{ - return __ocml_fma_rtz_f32(x, y, z); -} -__DEVICE__ -inline -float __fmul_rd(float x, float y) { return __ocml_mul_rtp_f32(x, y); } -__DEVICE__ -inline -float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); } -__DEVICE__ -inline -float __fmul_ru(float x, float y) { return __ocml_mul_rtn_f32(x, y); } -__DEVICE__ -inline -float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } +// __DEVICE__ +// inline +// float __fmaf_rd(float x, float y, float z) +// { +// return __ocml_fma_rtn_f32(x, y, z); +// } +// __DEVICE__ +// inline +// float __fmaf_rn(float x, float y, float z) +// { +// return __ocml_fma_rte_f32(x, y, z); +// } +// __DEVICE__ +// inline +// float __fmaf_ru(float x, float y, float z) +// { +// return __ocml_fma_rtp_f32(x, y, z); +// } +// __DEVICE__ +// inline +// float __fmaf_rz(float x, float y, float z) +// { +// return __ocml_fma_rtz_f32(x, y, z); +// } +// __DEVICE__ +// inline +// float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } +// __DEVICE__ +// inline +// float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); } +// __DEVICE__ +// inline +// float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } +// __DEVICE__ +// inline +// float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } __DEVICE__ inline float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); } @@ -592,30 +592,30 @@ float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); } __DEVICE__ inline float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); } -__DEVICE__ -inline -float __fsqrt_rd(float x) { return __ocml_sqrt_f32(x); } -__DEVICE__ -inline -float __fsqrt_rn(float x) { return __ocml_sqrt_f32(x); } -__DEVICE__ -inline -float __fsqrt_ru(float x) { return __ocml_sqrt_f32(x); } -__DEVICE__ -inline -float __fsqrt_rz(float x) { return __ocml_sqrt_f32(x); } -__DEVICE__ -inline -float __fsub_rd(float x, float y) { return __ocml_sub_rtp_f32(x, y); } -__DEVICE__ -inline -float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); } -__DEVICE__ -inline -float __fsub_ru(float x, float y) { return __ocml_sub_rtn_f32(x, y); } -__DEVICE__ -inline -float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } +// __DEVICE__ +// inline +// float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } +// __DEVICE__ +// inline +// float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); } +// __DEVICE__ +// inline +// float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } +// __DEVICE__ +// inline +// float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } +// __DEVICE__ +// inline +// float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } +// __DEVICE__ +// inline +// float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); } +// __DEVICE__ +// inline +// float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } +// __DEVICE__ +// inline +// float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } __DEVICE__ inline float __log10f(float x) { return __ocml_log10_f32(x); } @@ -1034,42 +1034,42 @@ double yn(int n, double x) } // BEGIN INTRINSICS -__DEVICE__ -inline -double __dadd_rd(double x, double y) { return __ocml_add_rtp_f64(x, y); } -__DEVICE__ -inline -double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); } -__DEVICE__ -inline -double __dadd_ru(double x, double y) { return __ocml_add_rtn_f64(x, y); } -__DEVICE__ -inline -double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } -__DEVICE__ -inline -double __ddiv_rd(double x, double y) { return x / y; } -__DEVICE__ -inline -double __ddiv_rn(double x, double y) { return x / y; } -__DEVICE__ -inline -double __ddiv_ru(double x, double y) { return x / y; } -__DEVICE__ -inline -double __ddiv_rz(double x, double y) { return x / y; } -__DEVICE__ -inline -double __dmul_rd(double x, double y) { return __ocml_mul_rtp_f64(x, y); } -__DEVICE__ -inline -double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); } -__DEVICE__ -inline -double __dmul_ru(double x, double y) { return __ocml_mul_rtn_f64(x, y); } -__DEVICE__ -inline -double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } +// __DEVICE__ +// inline +// double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } +// __DEVICE__ +// inline +// double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); } +// __DEVICE__ +// inline +// double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } +// __DEVICE__ +// inline +// double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } +// __DEVICE__ +// inline +// double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } +// __DEVICE__ +// inline +// double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); } +// __DEVICE__ +// inline +// double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } +// __DEVICE__ +// inline +// double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } +// __DEVICE__ +// inline +// double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } +// __DEVICE__ +// inline +// double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); } +// __DEVICE__ +// inline +// double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } +// __DEVICE__ +// inline +// double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } __DEVICE__ inline double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); } @@ -1082,54 +1082,54 @@ double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); } __DEVICE__ inline double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); } -__DEVICE__ -inline -double __dsqrt_rd(double x) { return __ocml_sqrt_f64(x); } -__DEVICE__ -inline -double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); } -__DEVICE__ -inline -double __dsqrt_ru(double x) { return __ocml_sqrt_f64(x); } -__DEVICE__ -inline -double __dsqrt_rz(double x) { return __ocml_sqrt_f64(x); } -__DEVICE__ -inline -double __dsub_rd(double x, double y) { return __ocml_sub_rtp_f64(x, y); } -__DEVICE__ -inline -double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); } -__DEVICE__ -inline -double __dsub_ru(double x, double y) { return __ocml_sub_rtn_f64(x, y); } -__DEVICE__ -inline -double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } -__DEVICE__ -inline -double __fma_rd(double x, double y, double z) -{ - return __ocml_fma_rtp_f64(x, y, z); -} -__DEVICE__ -inline -double __fma_rn(double x, double y, double z) -{ - return __ocml_fma_rte_f64(x, y, z); -} -__DEVICE__ -inline -double __fma_ru(double x, double y, double z) -{ - return __ocml_fma_rtn_f64(x, y, z); -} -__DEVICE__ -inline -double __fma_rz(double x, double y, double z) -{ - return __ocml_fma_rtz_f64(x, y, z); -} +// __DEVICE__ +// inline +// double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } +// __DEVICE__ +// inline +// double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); } +// __DEVICE__ +// inline +// double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } +// __DEVICE__ +// inline +// double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } +// __DEVICE__ +// inline +// double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } +// __DEVICE__ +// inline +// double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); } +// __DEVICE__ +// inline +// double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } +// __DEVICE__ +// inline +// double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } +// __DEVICE__ +// inline +// double __fma_rd(double x, double y, double z) +// { +// return __ocml_fma_rtn_f64(x, y, z); +// } +// __DEVICE__ +// inline +// double __fma_rn(double x, double y, double z) +// { +// return __ocml_fma_rte_f64(x, y, z); +// } +// __DEVICE__ +// inline +// double __fma_ru(double x, double y, double z) +// { +// return __ocml_fma_rtp_f64(x, y, z); +// } +// __DEVICE__ +// inline +// double __fma_rz(double x, double y, double z) +// { +// return __ocml_fma_rtz_f64(x, y, z); +// } // END INTRINSICS // END DOUBLE diff --git a/projects/hip/include/hip/hcc_detail/math_fwd.h b/projects/hip/include/hip/hcc_detail/math_fwd.h index 404c2f81d5..e5594924ba 100644 --- a/projects/hip/include/hip/hcc_detail/math_fwd.h +++ b/projects/hip/include/hip/hcc_detail/math_fwd.h @@ -288,6 +288,30 @@ __attribute__((const)) float __ocml_mul_rtz_f32(float, float); __device__ __attribute__((const)) +float __ocml_div_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtz_f32(float, float); +__device__ +__attribute__((const)) float __ocml_fma_rte_f32(float, float, float); __device__ __attribute__((const)) @@ -572,6 +596,30 @@ __attribute__((const)) double __ocml_mul_rtz_f64(double, double); __device__ __attribute__((const)) +double __ocml_div_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtz_f64(double, double); +__device__ +__attribute__((const)) double __ocml_fma_rte_f64(double, double, double); __device__ __attribute__((const)) @@ -594,4 +642,4 @@ double __llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64"); #if defined(__cplusplus) } // extern "C" -#endif \ No newline at end of file +#endif diff --git a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp index 17cd82c9ab..f6c515c03a 100644 --- a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp +++ b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp @@ -34,34 +34,34 @@ THE SOFTWARE. #pragma clang diagnostic ignored "-Wunused-variable" __device__ void double_precision_intrinsics() { - __dadd_rd(0.0, 1.0); - __dadd_rn(0.0, 1.0); - __dadd_ru(0.0, 1.0); - __dadd_rz(0.0, 1.0); - __ddiv_rd(0.0, 1.0); - __ddiv_rn(0.0, 1.0); - __ddiv_ru(0.0, 1.0); - __ddiv_rz(0.0, 1.0); - __dmul_rd(1.0, 2.0); - __dmul_rn(1.0, 2.0); - __dmul_ru(1.0, 2.0); - __dmul_rz(1.0, 2.0); + // __dadd_rd(0.0, 1.0); + // __dadd_rn(0.0, 1.0); + // __dadd_ru(0.0, 1.0); + // __dadd_rz(0.0, 1.0); + // __ddiv_rd(0.0, 1.0); + // __ddiv_rn(0.0, 1.0); + // __ddiv_ru(0.0, 1.0); + // __ddiv_rz(0.0, 1.0); + // __dmul_rd(1.0, 2.0); + // __dmul_rn(1.0, 2.0); + // __dmul_ru(1.0, 2.0); + // __dmul_rz(1.0, 2.0); __drcp_rd(2.0); __drcp_rn(2.0); __drcp_ru(2.0); __drcp_rz(2.0); - __dsqrt_rd(4.0); - __dsqrt_rn(4.0); - __dsqrt_ru(4.0); - __dsqrt_rz(4.0); - __dsub_rd(2.0, 1.0); - __dsub_rn(2.0, 1.0); - __dsub_ru(2.0, 1.0); - __dsub_rz(2.0, 1.0); - __fma_rd(1.0, 2.0, 3.0); - __fma_rn(1.0, 2.0, 3.0); - __fma_ru(1.0, 2.0, 3.0); - __fma_rz(1.0, 2.0, 3.0); + // __dsqrt_rd(4.0); + // __dsqrt_rn(4.0); + // __dsqrt_ru(4.0); + // __dsqrt_rz(4.0); + // __dsub_rd(2.0, 1.0); + // __dsub_rn(2.0, 1.0); + // __dsub_ru(2.0, 1.0); + // __dsub_rz(2.0, 1.0); + // __fma_rd(1.0, 2.0, 3.0); + // __fma_rn(1.0, 2.0, 3.0); + // __fma_ru(1.0, 2.0, 3.0); + // __fma_rz(1.0, 2.0, 3.0); } __global__ void compileDoublePrecisionIntrinsics(int ignored) { diff --git a/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp b/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp index f3d2a36931..623ea08a94 100644 --- a/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp +++ b/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp @@ -39,36 +39,36 @@ __device__ void single_precision_intrinsics() { __cosf(0.0f); __exp10f(0.0f); __expf(0.0f); - __fadd_rd(0.0f, 1.0f); - __fadd_rn(0.0f, 1.0f); - __fadd_ru(0.0f, 1.0f); - __fadd_rz(0.0f, 1.0f); - __fdiv_rd(4.0f, 2.0f); - __fdiv_rn(4.0f, 2.0f); - __fdiv_ru(4.0f, 2.0f); - __fdiv_rz(4.0f, 2.0f); + // __fadd_rd(0.0f, 1.0f); + // __fadd_rn(0.0f, 1.0f); + // __fadd_ru(0.0f, 1.0f); + // __fadd_rz(0.0f, 1.0f); + // __fdiv_rd(4.0f, 2.0f); + // __fdiv_rn(4.0f, 2.0f); + // __fdiv_ru(4.0f, 2.0f); + // __fdiv_rz(4.0f, 2.0f); __fdividef(4.0f, 2.0f); - __fmaf_rd(1.0f, 2.0f, 3.0f); - __fmaf_rn(1.0f, 2.0f, 3.0f); - __fmaf_ru(1.0f, 2.0f, 3.0f); - __fmaf_rz(1.0f, 2.0f, 3.0f); - __fmul_rd(1.0f, 2.0f); - __fmul_rn(1.0f, 2.0f); - __fmul_ru(1.0f, 2.0f); - __fmul_rz(1.0f, 2.0f); + // __fmaf_rd(1.0f, 2.0f, 3.0f); + // __fmaf_rn(1.0f, 2.0f, 3.0f); + // __fmaf_ru(1.0f, 2.0f, 3.0f); + // __fmaf_rz(1.0f, 2.0f, 3.0f); + // __fmul_rd(1.0f, 2.0f); + // __fmul_rn(1.0f, 2.0f); + // __fmul_ru(1.0f, 2.0f); + // __fmul_rz(1.0f, 2.0f); __frcp_rd(2.0f); __frcp_rn(2.0f); __frcp_ru(2.0f); __frcp_rz(2.0f); __frsqrt_rn(4.0f); - __fsqrt_rd(4.0f); - __fsqrt_rn(4.0f); - __fsqrt_ru(4.0f); - __fsqrt_rz(4.0f); - __fsub_rd(2.0f, 1.0f); - __fsub_rn(2.0f, 1.0f); - __fsub_ru(2.0f, 1.0f); - __fsub_rz(2.0f, 1.0f); + // __fsqrt_rd(4.0f); + // __fsqrt_rn(4.0f); + // __fsqrt_ru(4.0f); + // __fsqrt_rz(4.0f); + // __fsub_rd(2.0f, 1.0f); + // __fsub_rn(2.0f, 1.0f); + // __fsub_ru(2.0f, 1.0f); + // __fsub_rz(2.0f, 1.0f); __log10f(1.0f); __log2f(1.0f); __logf(1.0f); From 914506055ee9223d4c670e6a91ee168dc2c5e8c8 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Mon, 5 Nov 2018 22:54:55 +0000 Subject: [PATCH 09/23] Update hip-math-api doc to remove non-default-rounded [ROCm/hip commit: 7223277aa35544b89fceeb66cd94b16e8ea82156] --- projects/hip/docs/markdown/hip-math-api.md | 102 +++++++++++---------- 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/projects/hip/docs/markdown/hip-math-api.md b/projects/hip/docs/markdown/hip-math-api.md index 37efafbbbf..b3698ff2b3 100644 --- a/projects/hip/docs/markdown/hip-math-api.md +++ b/projects/hip/docs/markdown/hip-math-api.md @@ -1433,7 +1433,7 @@ __device__ float __expf(float x); __device__ static float __fadd_rd(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fadd_rn @@ -1441,7 +1441,7 @@ __device__ static float __fadd_rd(float x, float y); __device__ static float __fadd_rn(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fadd_ru @@ -1449,7 +1449,7 @@ __device__ static float __fadd_rn(float x, float y); __device__ static float __fadd_ru(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fadd_rz @@ -1457,7 +1457,7 @@ __device__ static float __fadd_ru(float x, float y); __device__ static float __fadd_rz(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdiv_rd @@ -1465,7 +1465,7 @@ __device__ static float __fadd_rz(float x, float y); __device__ static float __fdiv_rd(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdiv_rn @@ -1473,7 +1473,7 @@ __device__ static float __fdiv_rd(float x, float y); __device__ static float __fdiv_rn(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdiv_ru @@ -1481,7 +1481,7 @@ __device__ static float __fdiv_rn(float x, float y); __device__ static float __fdiv_ru(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdiv_rz @@ -1489,7 +1489,7 @@ __device__ static float __fdiv_ru(float x, float y); __device__ static float __fdiv_rz(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdividef @@ -1505,7 +1505,7 @@ __device__ static float __fdividef(float x, float y); __device__ float __fmaf_rd(float x, float y, float z); ``` -**Description:** Supported +**Description:** Unsupported ### __fmaf_rn @@ -1513,7 +1513,7 @@ __device__ float __fmaf_rd(float x, float y, float z); __device__ float __fmaf_rn(float x, float y, float z); ``` -**Description:** Supported +**Description:** Unsupported ### __fmaf_ru @@ -1521,7 +1521,7 @@ __device__ float __fmaf_rn(float x, float y, float z); __device__ float __fmaf_ru(float x, float y, float z); ``` -**Description:** Supported +**Description:** Unsupported ### __fmaf_rz @@ -1529,7 +1529,7 @@ __device__ float __fmaf_ru(float x, float y, float z); __device__ float __fmaf_rz(float x, float y, float z); ``` -**Description:** Supported +**Description:** Unsupported ### __fmul_rd @@ -1537,7 +1537,7 @@ __device__ float __fmaf_rz(float x, float y, float z); __device__ static float __fmul_rd(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fmul_rn @@ -1545,7 +1545,7 @@ __device__ static float __fmul_rd(float x, float y); __device__ static float __fmul_rn(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fmul_ru @@ -1553,7 +1553,7 @@ __device__ static float __fmul_rn(float x, float y); __device__ static float __fmul_ru(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fmul_rz @@ -1561,7 +1561,7 @@ __device__ static float __fmul_ru(float x, float y); __device__ static float __fmul_rz(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __frcp_rd @@ -1609,7 +1609,7 @@ __device__ float __frsqrt_rn(float x); __device__ float __fsqrt_rd(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsqrt_rn @@ -1617,7 +1617,7 @@ __device__ float __fsqrt_rd(float x); __device__ float __fsqrt_rn(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsqrt_ru @@ -1625,7 +1625,7 @@ __device__ float __fsqrt_rn(float x); __device__ float __fsqrt_ru(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsqrt_rz @@ -1633,7 +1633,7 @@ __device__ float __fsqrt_ru(float x); __device__ float __fsqrt_rz(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsub_rd @@ -1641,7 +1641,7 @@ __device__ float __fsqrt_rz(float x); __device__ static float __fsub_rd(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fsub_rn @@ -1649,7 +1649,7 @@ __device__ static float __fsub_rd(float x, float y); __device__ static float __fsub_rn(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fsub_ru @@ -1657,7 +1657,15 @@ __device__ static float __fsub_rn(float x, float y); __device__ static float __fsub_ru(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported + + +### __fsub_rz +```cpp +__device__ static float __fsub_rz(float x, float y); + +``` +**Description:** Unsupported ### __log10f @@ -1729,7 +1737,7 @@ __device__ float __tanf(float x); __device__ static double __dadd_rd(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dadd_rn @@ -1737,7 +1745,7 @@ __device__ static double __dadd_rd(double x, double y); __device__ static double __dadd_rn(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dadd_ru @@ -1745,7 +1753,7 @@ __device__ static double __dadd_rn(double x, double y); __device__ static double __dadd_ru(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dadd_rz @@ -1753,7 +1761,7 @@ __device__ static double __dadd_ru(double x, double y); __device__ static double __dadd_rz(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __ddiv_rd @@ -1761,7 +1769,7 @@ __device__ static double __dadd_rz(double x, double y); __device__ static double __ddiv_rd(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __ddiv_rn @@ -1769,7 +1777,7 @@ __device__ static double __ddiv_rd(double x, double y); __device__ static double __ddiv_rn(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __ddiv_ru @@ -1777,7 +1785,7 @@ __device__ static double __ddiv_rn(double x, double y); __device__ static double __ddiv_ru(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __ddiv_rz @@ -1785,7 +1793,7 @@ __device__ static double __ddiv_ru(double x, double y); __device__ static double __ddiv_rz(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dmul_rd @@ -1793,7 +1801,7 @@ __device__ static double __ddiv_rz(double x, double y); __device__ static double __dmul_rd(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dmul_rn @@ -1801,7 +1809,7 @@ __device__ static double __dmul_rd(double x, double y); __device__ static double __dmul_rn(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dmul_ru @@ -1809,7 +1817,7 @@ __device__ static double __dmul_rn(double x, double y); __device__ static double __dmul_ru(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dmul_rz @@ -1817,7 +1825,7 @@ __device__ static double __dmul_ru(double x, double y); __device__ static double __dmul_rz(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __drcp_rd @@ -1857,7 +1865,7 @@ __device__ double __drcp_rz(double x); __device__ double __dsqrt_rd(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsqrt_rn @@ -1865,7 +1873,7 @@ __device__ double __dsqrt_rd(double x); __device__ double __dsqrt_rn(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsqrt_ru @@ -1873,7 +1881,7 @@ __device__ double __dsqrt_rn(double x); __device__ double __dsqrt_ru(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsqrt_rz @@ -1881,7 +1889,7 @@ __device__ double __dsqrt_ru(double x); __device__ double __dsqrt_rz(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsub_rd @@ -1889,7 +1897,7 @@ __device__ double __dsqrt_rz(double x); __device__ static double __dsub_rd(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dsub_rn @@ -1897,7 +1905,7 @@ __device__ static double __dsub_rd(double x, double y); __device__ static double __dsub_rn(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dsub_ru @@ -1905,7 +1913,7 @@ __device__ static double __dsub_rn(double x, double y); __device__ static double __dsub_ru(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dsub_rz @@ -1913,7 +1921,7 @@ __device__ static double __dsub_ru(double x, double y); __device__ static double __dsub_rz(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __fma_rd @@ -1921,7 +1929,7 @@ __device__ static double __dsub_rz(double x, double y); __device__ double __fma_rd(double x, double y, double z); ``` -**Description:** Supported +**Description:** Unsupported ### __fma_rn @@ -1929,7 +1937,7 @@ __device__ double __fma_rd(double x, double y, double z); __device__ double __fma_rn(double x, double y, double z); ``` -**Description:** Supported +**Description:** Unsupported ### __fma_ru @@ -1937,7 +1945,7 @@ __device__ double __fma_rn(double x, double y, double z); __device__ double __fma_ru(double x, double y, double z); ``` -**Description:** Supported +**Description:** Unsupported ### __fma_rz @@ -1945,7 +1953,7 @@ __device__ double __fma_ru(double x, double y, double z); __device__ double __fma_rz(double x, double y, double z); ``` -**Description:** Supported +**Description:** Unsupported ### __brev From eb4053e3fecaeb5dd3952dfaafc93957d08038c2 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 6 Nov 2018 16:32:14 +0000 Subject: [PATCH 10/23] Guard the OCML rounded operations instead Instead of commenting all these functions out, guard the functions with a macro OCML_BASIC_ROUNDED_OPERATIONS. [ROCm/hip commit: e59c33250ab9aa4195e14f89aac3779be2f34bcd] --- .../include/hip/hcc_detail/math_functions.h | 346 +++++++++--------- .../hipDoublePrecisionIntrinsics.cpp | 52 +-- .../hipSinglePrecisionIntrinsics.cpp | 54 +-- 3 files changed, 236 insertions(+), 216 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/math_functions.h b/projects/hip/include/hip/hcc_detail/math_functions.h index 8a6091858b..08be321d68 100644 --- a/projects/hip/include/hip/hcc_detail/math_functions.h +++ b/projects/hip/include/hip/hcc_detail/math_functions.h @@ -514,69 +514,73 @@ float __exp10f(float x) { return __ocml_exp10_f32(x); } __DEVICE__ inline float __expf(float x) { return __ocml_exp_f32(x); } -// __DEVICE__ -// inline -// float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } -// __DEVICE__ -// inline -// float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); } -// __DEVICE__ -// inline -// float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } -// __DEVICE__ -// inline -// float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } -// __DEVICE__ -// inline -// float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } -// __DEVICE__ -// inline -// float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); } -// __DEVICE__ -// inline -// float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } -// __DEVICE__ -// inline -// float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } +__DEVICE__ +inline +float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); } +__DEVICE__ +inline +float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } +__DEVICE__ +inline +float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); } +__DEVICE__ +inline +float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } +#endif __DEVICE__ inline float __fdividef(float x, float y) { return x / y; } -// __DEVICE__ -// inline -// float __fmaf_rd(float x, float y, float z) -// { -// return __ocml_fma_rtn_f32(x, y, z); -// } -// __DEVICE__ -// inline -// float __fmaf_rn(float x, float y, float z) -// { -// return __ocml_fma_rte_f32(x, y, z); -// } -// __DEVICE__ -// inline -// float __fmaf_ru(float x, float y, float z) -// { -// return __ocml_fma_rtp_f32(x, y, z); -// } -// __DEVICE__ -// inline -// float __fmaf_rz(float x, float y, float z) -// { -// return __ocml_fma_rtz_f32(x, y, z); -// } -// __DEVICE__ -// inline -// float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } -// __DEVICE__ -// inline -// float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); } -// __DEVICE__ -// inline -// float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } -// __DEVICE__ -// inline -// float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fmaf_rd(float x, float y, float z) +{ + return __ocml_fma_rtn_f32(x, y, z); +} +__DEVICE__ +inline +float __fmaf_rn(float x, float y, float z) +{ + return __ocml_fma_rte_f32(x, y, z); +} +__DEVICE__ +inline +float __fmaf_ru(float x, float y, float z) +{ + return __ocml_fma_rtp_f32(x, y, z); +} +__DEVICE__ +inline +float __fmaf_rz(float x, float y, float z) +{ + return __ocml_fma_rtz_f32(x, y, z); +} +__DEVICE__ +inline +float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } +__DEVICE__ +inline +float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); } +__DEVICE__ +inline +float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } +__DEVICE__ +inline +float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } +#endif __DEVICE__ inline float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); } @@ -592,30 +596,32 @@ float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); } __DEVICE__ inline float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); } -// __DEVICE__ -// inline -// float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } -// __DEVICE__ -// inline -// float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); } -// __DEVICE__ -// inline -// float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } -// __DEVICE__ -// inline -// float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } -// __DEVICE__ -// inline -// float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } -// __DEVICE__ -// inline -// float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); } -// __DEVICE__ -// inline -// float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } -// __DEVICE__ -// inline -// float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } +__DEVICE__ +inline +float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); } +__DEVICE__ +inline +float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } +__DEVICE__ +inline +float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } +__DEVICE__ +inline +float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } +__DEVICE__ +inline +float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); } +__DEVICE__ +inline +float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } +__DEVICE__ +inline +float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } +#endif __DEVICE__ inline float __log10f(float x) { return __ocml_log10_f32(x); } @@ -1034,42 +1040,44 @@ double yn(int n, double x) } // BEGIN INTRINSICS -// __DEVICE__ -// inline -// double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } -// __DEVICE__ -// inline -// double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); } -// __DEVICE__ -// inline -// double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } -// __DEVICE__ -// inline -// double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } -// __DEVICE__ -// inline -// double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } -// __DEVICE__ -// inline -// double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); } -// __DEVICE__ -// inline -// double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } -// __DEVICE__ -// inline -// double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } -// __DEVICE__ -// inline -// double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } -// __DEVICE__ -// inline -// double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); } -// __DEVICE__ -// inline -// double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } -// __DEVICE__ -// inline -// double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } +__DEVICE__ +inline +double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); } +__DEVICE__ +inline +double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } +__DEVICE__ +inline +double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); } +__DEVICE__ +inline +double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } +__DEVICE__ +inline +double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } +__DEVICE__ +inline +double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); } +__DEVICE__ +inline +double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } +__DEVICE__ +inline +double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } +#endif __DEVICE__ inline double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); } @@ -1082,54 +1090,56 @@ double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); } __DEVICE__ inline double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); } -// __DEVICE__ -// inline -// double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } -// __DEVICE__ -// inline -// double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); } -// __DEVICE__ -// inline -// double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } -// __DEVICE__ -// inline -// double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } -// __DEVICE__ -// inline -// double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } -// __DEVICE__ -// inline -// double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); } -// __DEVICE__ -// inline -// double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } -// __DEVICE__ -// inline -// double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } -// __DEVICE__ -// inline -// double __fma_rd(double x, double y, double z) -// { -// return __ocml_fma_rtn_f64(x, y, z); -// } -// __DEVICE__ -// inline -// double __fma_rn(double x, double y, double z) -// { -// return __ocml_fma_rte_f64(x, y, z); -// } -// __DEVICE__ -// inline -// double __fma_ru(double x, double y, double z) -// { -// return __ocml_fma_rtp_f64(x, y, z); -// } -// __DEVICE__ -// inline -// double __fma_rz(double x, double y, double z) -// { -// return __ocml_fma_rtz_f64(x, y, z); -// } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } +__DEVICE__ +inline +double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); } +__DEVICE__ +inline +double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } +__DEVICE__ +inline +double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } +__DEVICE__ +inline +double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } +__DEVICE__ +inline +double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); } +__DEVICE__ +inline +double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } +__DEVICE__ +inline +double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } +__DEVICE__ +inline +double __fma_rd(double x, double y, double z) +{ + return __ocml_fma_rtn_f64(x, y, z); +} +__DEVICE__ +inline +double __fma_rn(double x, double y, double z) +{ + return __ocml_fma_rte_f64(x, y, z); +} +__DEVICE__ +inline +double __fma_ru(double x, double y, double z) +{ + return __ocml_fma_rtp_f64(x, y, z); +} +__DEVICE__ +inline +double __fma_rz(double x, double y, double z) +{ + return __ocml_fma_rtz_f64(x, y, z); +} +#endif // END INTRINSICS // END DOUBLE diff --git a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp index f6c515c03a..295fd83708 100644 --- a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp +++ b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp @@ -34,34 +34,38 @@ THE SOFTWARE. #pragma clang diagnostic ignored "-Wunused-variable" __device__ void double_precision_intrinsics() { - // __dadd_rd(0.0, 1.0); - // __dadd_rn(0.0, 1.0); - // __dadd_ru(0.0, 1.0); - // __dadd_rz(0.0, 1.0); - // __ddiv_rd(0.0, 1.0); - // __ddiv_rn(0.0, 1.0); - // __ddiv_ru(0.0, 1.0); - // __ddiv_rz(0.0, 1.0); - // __dmul_rd(1.0, 2.0); - // __dmul_rn(1.0, 2.0); - // __dmul_ru(1.0, 2.0); - // __dmul_rz(1.0, 2.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dadd_rd(0.0, 1.0); + __dadd_rn(0.0, 1.0); + __dadd_ru(0.0, 1.0); + __dadd_rz(0.0, 1.0); + __ddiv_rd(0.0, 1.0); + __ddiv_rn(0.0, 1.0); + __ddiv_ru(0.0, 1.0); + __ddiv_rz(0.0, 1.0); + __dmul_rd(1.0, 2.0); + __dmul_rn(1.0, 2.0); + __dmul_ru(1.0, 2.0); + __dmul_rz(1.0, 2.0); +#endif __drcp_rd(2.0); __drcp_rn(2.0); __drcp_ru(2.0); __drcp_rz(2.0); - // __dsqrt_rd(4.0); - // __dsqrt_rn(4.0); - // __dsqrt_ru(4.0); - // __dsqrt_rz(4.0); - // __dsub_rd(2.0, 1.0); - // __dsub_rn(2.0, 1.0); - // __dsub_ru(2.0, 1.0); - // __dsub_rz(2.0, 1.0); - // __fma_rd(1.0, 2.0, 3.0); - // __fma_rn(1.0, 2.0, 3.0); - // __fma_ru(1.0, 2.0, 3.0); - // __fma_rz(1.0, 2.0, 3.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dsqrt_rd(4.0); + __dsqrt_rn(4.0); + __dsqrt_ru(4.0); + __dsqrt_rz(4.0); + __dsub_rd(2.0, 1.0); + __dsub_rn(2.0, 1.0); + __dsub_ru(2.0, 1.0); + __dsub_rz(2.0, 1.0); + __fma_rd(1.0, 2.0, 3.0); + __fma_rn(1.0, 2.0, 3.0); + __fma_ru(1.0, 2.0, 3.0); + __fma_rz(1.0, 2.0, 3.0); +#endif } __global__ void compileDoublePrecisionIntrinsics(int ignored) { diff --git a/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp b/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp index 623ea08a94..db60099558 100644 --- a/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp +++ b/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp @@ -39,36 +39,42 @@ __device__ void single_precision_intrinsics() { __cosf(0.0f); __exp10f(0.0f); __expf(0.0f); - // __fadd_rd(0.0f, 1.0f); - // __fadd_rn(0.0f, 1.0f); - // __fadd_ru(0.0f, 1.0f); - // __fadd_rz(0.0f, 1.0f); - // __fdiv_rd(4.0f, 2.0f); - // __fdiv_rn(4.0f, 2.0f); - // __fdiv_ru(4.0f, 2.0f); - // __fdiv_rz(4.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fadd_rd(0.0f, 1.0f); + __fadd_rn(0.0f, 1.0f); + __fadd_ru(0.0f, 1.0f); + __fadd_rz(0.0f, 1.0f); + __fdiv_rd(4.0f, 2.0f); + __fdiv_rn(4.0f, 2.0f); + __fdiv_ru(4.0f, 2.0f); + __fdiv_rz(4.0f, 2.0f); +#endif __fdividef(4.0f, 2.0f); - // __fmaf_rd(1.0f, 2.0f, 3.0f); - // __fmaf_rn(1.0f, 2.0f, 3.0f); - // __fmaf_ru(1.0f, 2.0f, 3.0f); - // __fmaf_rz(1.0f, 2.0f, 3.0f); - // __fmul_rd(1.0f, 2.0f); - // __fmul_rn(1.0f, 2.0f); - // __fmul_ru(1.0f, 2.0f); - // __fmul_rz(1.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fmaf_rd(1.0f, 2.0f, 3.0f); + __fmaf_rn(1.0f, 2.0f, 3.0f); + __fmaf_ru(1.0f, 2.0f, 3.0f); + __fmaf_rz(1.0f, 2.0f, 3.0f); + __fmul_rd(1.0f, 2.0f); + __fmul_rn(1.0f, 2.0f); + __fmul_ru(1.0f, 2.0f); + __fmul_rz(1.0f, 2.0f); +#endif __frcp_rd(2.0f); __frcp_rn(2.0f); __frcp_ru(2.0f); __frcp_rz(2.0f); __frsqrt_rn(4.0f); - // __fsqrt_rd(4.0f); - // __fsqrt_rn(4.0f); - // __fsqrt_ru(4.0f); - // __fsqrt_rz(4.0f); - // __fsub_rd(2.0f, 1.0f); - // __fsub_rn(2.0f, 1.0f); - // __fsub_ru(2.0f, 1.0f); - // __fsub_rz(2.0f, 1.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fsqrt_rd(4.0f); + __fsqrt_rn(4.0f); + __fsqrt_ru(4.0f); + __fsqrt_rz(4.0f); + __fsub_rd(2.0f, 1.0f); + __fsub_rn(2.0f, 1.0f); + __fsub_ru(2.0f, 1.0f); + __fsub_rz(2.0f, 1.0f); +#endif __log10f(1.0f); __log2f(1.0f); __logf(1.0f); From 8bf242966383755b079fe27d3c37abfe95220594 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 6 Nov 2018 19:53:28 +0000 Subject: [PATCH 11/23] Guard rcp rounded implementation as well Since rcp implementations of non-default rounded versions are not correct or supported in OCML, guard them using the same macro OCML_BASIC_ROUNDED_OPERATIONS. Also update the docs and tests. [ROCm/hip commit: 0cfaa52d15575822a62472d30166714094f0cb29] --- projects/hip/docs/markdown/hip-math-api.md | 18 +++++++++--------- .../include/hip/hcc_detail/math_functions.h | 4 ---- .../deviceLib/hipDoublePrecisionIntrinsics.cpp | 2 -- .../hip/tests/src/deviceLib/hipFloatMath.cpp | 10 ++++++---- .../deviceLib/hipSinglePrecisionIntrinsics.cpp | 2 -- 5 files changed, 15 insertions(+), 21 deletions(-) diff --git a/projects/hip/docs/markdown/hip-math-api.md b/projects/hip/docs/markdown/hip-math-api.md index b3698ff2b3..9b8a3f2f11 100644 --- a/projects/hip/docs/markdown/hip-math-api.md +++ b/projects/hip/docs/markdown/hip-math-api.md @@ -1569,7 +1569,7 @@ __device__ static float __fmul_rz(float x, float y); __device__ float __frcp_rd(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __frcp_rn @@ -1577,7 +1577,7 @@ __device__ float __frcp_rd(float x); __device__ float __frcp_rn(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __frcp_ru @@ -1585,7 +1585,7 @@ __device__ float __frcp_rn(float x); __device__ float __frcp_ru(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __frcp_rz @@ -1593,7 +1593,7 @@ __device__ float __frcp_ru(float x); __device__ float __frcp_rz(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __frsqrt_rn @@ -1601,7 +1601,7 @@ __device__ float __frcp_rz(float x); __device__ float __frsqrt_rn(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsqrt_rd @@ -1833,7 +1833,7 @@ __device__ static double __dmul_rz(double x, double y); __device__ double __drcp_rd(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __drcp_rn @@ -1841,7 +1841,7 @@ __device__ double __drcp_rd(double x); __device__ double __drcp_rn(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __drcp_ru @@ -1849,7 +1849,7 @@ __device__ double __drcp_rn(double x); __device__ double __drcp_ru(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __drcp_rz @@ -1857,7 +1857,7 @@ __device__ double __drcp_ru(double x); __device__ double __drcp_rz(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsqrt_rd diff --git a/projects/hip/include/hip/hcc_detail/math_functions.h b/projects/hip/include/hip/hcc_detail/math_functions.h index 08be321d68..557257b2b0 100644 --- a/projects/hip/include/hip/hcc_detail/math_functions.h +++ b/projects/hip/include/hip/hcc_detail/math_functions.h @@ -580,7 +580,6 @@ float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } __DEVICE__ inline float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } -#endif __DEVICE__ inline float __frcp_rd(float x) { return __llvm_amdgcn_rcp_f32(x); } @@ -596,7 +595,6 @@ float __frcp_rz(float x) { return __llvm_amdgcn_rcp_f32(x); } __DEVICE__ inline float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } @@ -1077,7 +1075,6 @@ double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } __DEVICE__ inline double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } -#endif __DEVICE__ inline double __drcp_rd(double x) { return __llvm_amdgcn_rcp_f64(x); } @@ -1090,7 +1087,6 @@ double __drcp_ru(double x) { return __llvm_amdgcn_rcp_f64(x); } __DEVICE__ inline double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } diff --git a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp index 295fd83708..939bdae743 100644 --- a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp +++ b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp @@ -47,12 +47,10 @@ __device__ void double_precision_intrinsics() { __dmul_rn(1.0, 2.0); __dmul_ru(1.0, 2.0); __dmul_rz(1.0, 2.0); -#endif __drcp_rd(2.0); __drcp_rn(2.0); __drcp_ru(2.0); __drcp_rz(2.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS __dsqrt_rd(4.0); __dsqrt_rn(4.0); __dsqrt_ru(4.0); diff --git a/projects/hip/tests/src/deviceLib/hipFloatMath.cpp b/projects/hip/tests/src/deviceLib/hipFloatMath.cpp index ee83309f28..c6a07e26a9 100644 --- a/projects/hip/tests/src/deviceLib/hipFloatMath.cpp +++ b/projects/hip/tests/src/deviceLib/hipFloatMath.cpp @@ -38,11 +38,13 @@ __global__ void floatMath(float* In, float* Out) { Out[tid] = __cosf(In[tid]); Out[tid] = __exp10f(Out[tid]); Out[tid] = __expf(Out[tid]); +#if defined OCML_BASIC_ROUNDED_OPERATIONS Out[tid] = __frsqrt_rn(Out[tid]); - //Out[tid] = __fsqrt_rd(Out[tid]); - //Out[tid] = __fsqrt_rn(Out[tid]); - //Out[tid] = __fsqrt_ru(Out[tid]); - //Out[tid] = __fsqrt_rz(Out[tid]); + Out[tid] = __fsqrt_rd(Out[tid]); + Out[tid] = __fsqrt_rn(Out[tid]); + Out[tid] = __fsqrt_ru(Out[tid]); + Out[tid] = __fsqrt_rz(Out[tid]); +#endif Out[tid] = __log10f(Out[tid]); Out[tid] = __log2f(Out[tid]); Out[tid] = __logf(Out[tid]); diff --git a/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp b/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp index db60099558..b216b3cb54 100644 --- a/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp +++ b/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp @@ -59,13 +59,11 @@ __device__ void single_precision_intrinsics() { __fmul_rn(1.0f, 2.0f); __fmul_ru(1.0f, 2.0f); __fmul_rz(1.0f, 2.0f); -#endif __frcp_rd(2.0f); __frcp_rn(2.0f); __frcp_ru(2.0f); __frcp_rz(2.0f); __frsqrt_rn(4.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS __fsqrt_rd(4.0f); __fsqrt_rn(4.0f); __fsqrt_ru(4.0f); From aaf12143ae789f7122b6c42944c2918615541d14 Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Wed, 31 Oct 2018 14:09:59 -0400 Subject: [PATCH 12/23] Add more checks for fatbin [ROCm/hip commit: a31b6b78d56c0e65819250912ced6cad471858e1] --- projects/hip/src/hip_clang.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/projects/hip/src/hip_clang.cpp b/projects/hip/src/hip_clang.cpp index cfd75df562..44080884e7 100644 --- a/projects/hip/src/hip_clang.cpp +++ b/projects/hip/src/hip_clang.cpp @@ -165,11 +165,13 @@ extern "C" void __hipRegisterFunction( assert(modules && modules->size() >= g_deviceCnt); for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { hipFunction_t function; - if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName)) { + if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) && + function != nullptr) { functions[deviceId] = function; } else { - tprintf(DB_FB, "missing kernel %s for device %d\n", deviceName, deviceId); + tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" + " device %d\n", deviceName, deviceId); } } @@ -249,9 +251,11 @@ hipError_t hipLaunchByPtr(const void *hostFunction) hipError_t e = hipSuccess; decltype(g_functions)::iterator it; - if ((it = g_functions.find(hostFunction)) == g_functions.end()) { + if ((it = g_functions.find(hostFunction)) == g_functions.end() || + !it->second[deviceId]) { e = hipErrorUnknown; - fprintf(stderr, "kernel %p not found!\n", hostFunction); + fprintf(stderr, "hipLaunchByPtr cannot find kernel with stub address %p" + " for device %d!\n", hostFunction, deviceId); abort(); } else { size_t size = exec._arguments.size(); From a7f7a01513d424d7c5c8eddd859ccc0d79d7c729 Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Thu, 8 Nov 2018 11:28:47 -0500 Subject: [PATCH 13/23] Let hipcc handle clang-offload-bundler file in obj format for hip-clang [ROCm/hip commit: 3d51a1fb0105e2f2312d2523c20e0034339f6ada] --- projects/hip/bin/hipcc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/hip/bin/hipcc b/projects/hip/bin/hipcc index cea6211a87..68e4a96721 100755 --- a/projects/hip/bin/hipcc +++ b/projects/hip/bin/hipcc @@ -498,6 +498,10 @@ foreach $arg (@ARGV) $obj = "$tmpdir/$obj"; my $fileType = `file $obj`; my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/); + if ($fileType =~ m/ELF/) { + my $sections = `readelf -e -W $obj`; + $isObj = !($sections =~ m/__CLANG_OFFLOAD_BUNDLE__/); + } $allIsObj = ($allIsObj and $isObj); if ($isObj) { $realObjs = ($realObjs . " " . $obj); From 591d4d1b5ff6d147ac293a3527dc252b868dd5b8 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Mon, 12 Nov 2018 00:32:35 +0000 Subject: [PATCH 14/23] Handle (odd) corner case of argumentless __global__ function. [ROCm/hip commit: c0bd1a5af8636d0c5d3fa52fcd20d8676c25ff39] --- projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp index ba9929c0a6..5edddad6c5 100644 --- a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp @@ -102,6 +102,8 @@ inline std::vector make_kernarg( static_assert(sizeof...(Formals) == sizeof...(Actuals), "The count of formal arguments must match the count of actuals."); + if (sizeof...(Formals) == 0) return {}; + const auto it = function_names().find( reinterpret_cast(kernel)); From 51c47fcc2e781ee0c6ce2bc58081f93fb7cc09db Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Mon, 12 Nov 2018 01:51:59 +0000 Subject: [PATCH 15/23] Missing handling nullary `__global__` functions for mixed arity cases. [ROCm/hip commit: 4ebc229b9ad496d3974641273eba4a5cb8a7af72] --- projects/hip/src/program_state.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/projects/hip/src/program_state.cpp b/projects/hip/src/program_state.cpp index 97e9035e0d..7e42a44245 100644 --- a/projects/hip/src/program_state.cpp +++ b/projects/hip/src/program_state.cpp @@ -409,13 +409,18 @@ void read_kernarg_metadata( auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx); dx += fn.size(); + + auto dx1 = tmp.find("CodeProps", dx); dx = tmp.find("Args:", dx); + if (dx1 < dx) { + dx = dx1; + continue; + } if (dx == string::npos) break; static constexpr decltype(tmp.size()) args_sz{5}; - dx = parse_args( - tmp, dx + args_sz, tmp.find("CodeProps", dx), kernargs[fn]); + dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]); } while (true); } } From ecea878072b8219cb6e014d657e6ba7b445a83f1 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Tue, 13 Nov 2018 00:49:20 +0530 Subject: [PATCH 16/23] Fixed hipMemcpyToSymbol doesn't work on GPU other than device 0 SWDEV-166881 [ROCm/hip commit: 11e7ab8879952da0dec0e29976398da7850b47b0] --- projects/hip/src/hip_memory.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/projects/hip/src/hip_memory.cpp b/projects/hip/src/hip_memory.cpp index 4ea5b24f43..7c25b714f8 100644 --- a/projects/hip/src/hip_memory.cpp +++ b/projects/hip/src/hip_memory.cpp @@ -985,10 +985,9 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); - if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost || + if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDefault || kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) { - stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind); - // acc.memcpy_symbol(dst, (void*)src, count+offset); + stream->locked_copySync((char*)dst+offset, (void*)src, count, kind, false); } else { return ihipLogStatus(hipErrorInvalidValue); } @@ -1018,9 +1017,9 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); - if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost || + if (kind == hipMemcpyDefault || kind == hipMemcpyDeviceToHost || kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) { - stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind); + stream->locked_copySync((void*)dst, (char*)src+offset, count, kind, false); } else { return ihipLogStatus(hipErrorInvalidValue); } @@ -1052,7 +1051,7 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_ if (stream) { try { - stream->lockedSymbolCopyAsync(acc, dst, (void*)src, count, offset, kind); + hip_internal::memcpyAsync((char*)dst+offset, src, count, kind, stream); } catch (ihipException& ex) { e = ex._code; } @@ -1088,7 +1087,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co stream = ihipSyncAndResolveStream(stream); if (stream) { try { - stream->lockedSymbolCopyAsync(acc, dst, src, count, offset, kind); + hip_internal::memcpyAsync(dst, (char*)src+offset, count, kind, stream); } catch (ihipException& ex) { e = ex._code; } From a30829a7bf1f8fce2aa5e519f6ca6623691ab602 Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Sun, 11 Nov 2018 22:51:28 -0500 Subject: [PATCH 17/23] Define __hip_device_heap in header for hip-clang only [ROCm/hip commit: 17ac81b69eb8cdd74ce5ea660d7f83f9120b6b0c] --- projects/hip/include/hip/hcc_detail/hip_memory.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/projects/hip/include/hip/hcc_detail/hip_memory.h b/projects/hip/include/hip/hcc_detail/hip_memory.h index 2c9ec1b7c3..866b9e879e 100644 --- a/projects/hip/include/hip/hcc_detail/hip_memory.h +++ b/projects/hip/include/hip/hcc_detail/hip_memory.h @@ -41,8 +41,14 @@ THE SOFTWARE. #define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE) +#if __HIP__ && __HIP_DEVICE_COMPILE__ +__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP]; +__attribute__((weak)) __device__ + uint32_t __hip_device_page_flag[__HIP_NUM_PAGES]; +#else extern __device__ char __hip_device_heap[]; extern __device__ uint32_t __hip_device_page_flag[]; +#endif extern "C" inline __device__ void* __hip_malloc(size_t size) { char* heap = (char*)__hip_device_heap; From 69080f3cb411a586a71b40777ac3c495259984bb Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Sun, 11 Nov 2018 15:49:34 -0500 Subject: [PATCH 18/23] Fix sample bit_extract for hip-clang [ROCm/hip commit: bc40ddabe0b503135ee74c9ba0fa4242885ca57e] --- projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp b/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp index ab7a4b35a6..5c3907f0d3 100644 --- a/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp +++ b/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp @@ -23,10 +23,6 @@ THE SOFTWARE. #include #include #include "hip/hip_runtime.h" -#ifdef __HIP_PLATFORM_HCC__ -#include -#endif - #define CHECK(cmd) \ { \ @@ -44,7 +40,7 @@ __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N) for (size_t i = offset; i < N; i += stride) { #ifdef __HIP_PLATFORM_HCC__ - C_d[i] = hc::__bitextract_u32(A_d[i], 8, 4); + C_d[i] = __bitextract_u32(A_d[i], 8, 4); #else /* defined __HIP_PLATFORM_NVCC__ or other path */ C_d[i] = ((A_d[i] & 0xf00) >> 8); #endif From 30c4fd3875cedbd7db4115ed3f5683a2f7cfb0a5 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Tue, 13 Nov 2018 07:01:17 +0530 Subject: [PATCH 19/23] Fixed symbol tracking device index [ROCm/hip commit: 6b3cbc65ad0de050699e8140b2a7869e784cd519] --- projects/hip/src/hip_module.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/projects/hip/src/hip_module.cpp b/projects/hip/src/hip_module.cpp index 1dc6701fe6..019bafbe43 100644 --- a/projects/hip/src/hip_module.cpp +++ b/projects/hip/src/hip_module.cpp @@ -258,12 +258,16 @@ struct Agent_global { uint32_t byte_cnt; }; -inline void track(const Agent_global& x) { +inline void track(const Agent_global& x, hsa_agent_t agent) { tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name.c_str(), x.address, x.byte_cnt); - auto device = ihipGetTlsDefaultCtx()->getWriteableDevice(); - + int deviceIndex =0; + for ( deviceIndex = 0; deviceIndex < g_deviceCnt; deviceIndex++) { + if(g_allAgents[deviceIndex] == agent) + break; + } + auto device = ihipGetDevice(deviceIndex - 1); hc::AmPointerInfo ptr_info(nullptr, x.address, x.address, x.byte_cnt, device->_acc, true, false); hc::am_memtracker_add(x.address, ptr_info); @@ -276,7 +280,7 @@ inline void track(const Agent_global& x) { } template > -inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t, +inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t agent, hsa_executable_symbol_t x, void* out) { assert(out); @@ -286,7 +290,7 @@ inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t, if (t == HSA_SYMBOL_KIND_VARIABLE) { static_cast(out)->push_back(Agent_global{name(x), address(x), size(x)}); - track(static_cast(out)->back()); + track(static_cast(out)->back(),agent); } return HSA_STATUS_SUCCESS; From 517ba1c4c18af597a9c5a3885a5e6e6de8068fd0 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 16 Nov 2018 01:23:25 +0300 Subject: [PATCH 20/23] [HIPIFY][LLVMCompat] support of upcoming LLVM 8.0 + StringRef issue, which is finally moved to LLVM from Clang + Renamed getBeginLoc() and getEndLoc() resolution for Expr and TypeLoc classes + Support all the previous LLVM versions via LLVCompat [ROCm/hip commit: aad5858cb16c2dfa95557ff86fd3187dca6cbca4] --- .../hip/hipify-clang/src/HipifyAction.cpp | 10 +++--- projects/hip/hipify-clang/src/HipifyAction.h | 1 + projects/hip/hipify-clang/src/LLVMCompat.cpp | 36 +++++++++++++++++-- projects/hip/hipify-clang/src/LLVMCompat.h | 10 +++++- 4 files changed, 49 insertions(+), 8 deletions(-) diff --git a/projects/hip/hipify-clang/src/HipifyAction.cpp b/projects/hip/hipify-clang/src/HipifyAction.cpp index ce185c39a8..b370df794e 100644 --- a/projects/hip/hipify-clang/src/HipifyAction.cpp +++ b/projects/hip/hipify-clang/src/HipifyAction.cpp @@ -270,14 +270,14 @@ bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::Matc if (numArgs > 0) { OS << ", "; // Start of the first argument. - clang::SourceLocation argStart = launchKernel->getArg(0)->getLocStart(); + clang::SourceLocation argStart = llcompat::getBeginLoc(launchKernel->getArg(0)); // End of the last argument. - clang::SourceLocation argEnd = launchKernel->getArg(numArgs - 1)->getLocEnd(); + clang::SourceLocation argEnd = llcompat::getEndLoc(launchKernel->getArg(numArgs - 1)); OS << readSourceText(*SM, {argStart, argEnd}); } OS << ")"; - clang::SourceRange replacementRange = getWriteRange(*SM, {launchKernel->getLocStart(), launchKernel->getLocEnd()}); + clang::SourceRange replacementRange = getWriteRange(*SM, {llcompat::getBeginLoc(launchKernel), llcompat::getEndLoc(launchKernel)}); clang::SourceLocation launchStart = replacementRange.getBegin(); clang::SourceLocation launchEnd = replacementRange.getEnd(); size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(launchEnd, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(launchStart); @@ -320,8 +320,8 @@ bool HipifyAction::cudaSharedIncompleteArrayVar(const clang::ast_matchers::Match } if (!typeName.empty()) { - clang::SourceLocation slStart = sharedVar->getLocStart(); - clang::SourceLocation slEnd = sharedVar->getLocEnd(); + clang::SourceLocation slStart = llcompat::getBeginLoc(sharedVar->getTypeSourceInfo()->getTypeLoc()); + clang::SourceLocation slEnd = llcompat::getEndLoc(sharedVar->getTypeSourceInfo()->getTypeLoc()); clang::SourceManager* SM = Result.SourceManager; size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1; std::string varName = sharedVar->getNameAsString(); diff --git a/projects/hip/hipify-clang/src/HipifyAction.h b/projects/hip/hipify-clang/src/HipifyAction.h index 7b54dddf54..9d30a72592 100644 --- a/projects/hip/hipify-clang/src/HipifyAction.h +++ b/projects/hip/hipify-clang/src/HipifyAction.h @@ -9,6 +9,7 @@ #include "Statistics.h" namespace ct = clang::tooling; +using namespace llvm; /** * A FrontendAction that hipifies CUDA programs. diff --git a/projects/hip/hipify-clang/src/LLVMCompat.cpp b/projects/hip/hipify-clang/src/LLVMCompat.cpp index 4ab62310d6..611bb28cbe 100644 --- a/projects/hip/hipify-clang/src/LLVMCompat.cpp +++ b/projects/hip/hipify-clang/src/LLVMCompat.cpp @@ -8,11 +8,11 @@ void PrintStackTraceOnErrorSignal() { #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 8) llvm::sys::PrintStackTraceOnErrorSignal(); #else - llvm::sys::PrintStackTraceOnErrorSignal(clang::StringRef()); + llvm::sys::PrintStackTraceOnErrorSignal(StringRef()); #endif } -ct::Replacements& getReplacements(ct::RefactoringTool& Tool, clang::StringRef file) { +ct::Replacements& getReplacements(ct::RefactoringTool& Tool, StringRef file) { #if LLVM_VERSION_MAJOR > 3 // getReplacements() now returns a map from filename to Replacements - so create an entry // for this source file and return a reference to it. @@ -40,4 +40,36 @@ void EnterPreprocessorTokenStream(clang::Preprocessor& _pp, const clang::Token * #endif } +clang::SourceLocation getBeginLoc(const clang::Stmt* stmt) { +#if LLVM_VERSION_MAJOR < 8 + return stmt->getLocStart(); +#else + return stmt->getBeginLoc(); +#endif +} + +clang::SourceLocation getBeginLoc(const clang::TypeLoc& typeLoc) { +#if LLVM_VERSION_MAJOR < 8 + return typeLoc.getLocStart(); +#else + return typeLoc.getBeginLoc(); +#endif +} + +clang::SourceLocation getEndLoc(const clang::Stmt* stmt) { +#if LLVM_VERSION_MAJOR < 8 + return stmt->getLocEnd(); +#else + return stmt->getEndLoc(); +#endif +} + +clang::SourceLocation getEndLoc(const clang::TypeLoc& typeLoc) { +#if LLVM_VERSION_MAJOR < 8 + return typeLoc.getLocEnd(); +#else + return typeLoc.getEndLoc(); +#endif +} + } // namespace llcompat diff --git a/projects/hip/hipify-clang/src/LLVMCompat.h b/projects/hip/hipify-clang/src/LLVMCompat.h index 9f82e36a1f..a43af857bf 100644 --- a/projects/hip/hipify-clang/src/LLVMCompat.h +++ b/projects/hip/hipify-clang/src/LLVMCompat.h @@ -25,15 +25,23 @@ namespace llcompat { #define LLVM_DEBUG(X) DEBUG(X) #endif +clang::SourceLocation getBeginLoc(const clang::Stmt* stmt); +clang::SourceLocation getBeginLoc(const clang::TypeLoc& typeLoc); + +clang::SourceLocation getEndLoc(const clang::Stmt* stmt); +clang::SourceLocation getEndLoc(const clang::TypeLoc& typeLoc); + void PrintStackTraceOnErrorSignal(); +using namespace llvm; + /** * Get the replacement map for a given filename in a RefactoringTool. * * Older LLVM versions don't actually support multiple filenames, so everything all gets * smushed together. It is the caller's responsibility to cope with this. */ -ct::Replacements& getReplacements(ct::RefactoringTool& Tool, clang::StringRef file); +ct::Replacements& getReplacements(ct::RefactoringTool& Tool, StringRef file); /** * Add a Replacement to a Replacements. From 9d6832e1703724ec19400a112b07c27e0f121614 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Mon, 19 Nov 2018 14:31:40 +0530 Subject: [PATCH 21/23] [ci] Renable excluded tests Regressions caused by dependent components have been fixed or workaround put in place. Change-Id: I9ecaf0a4a645d9222f12d2c45291f2b23984b72b [ROCm/hip commit: 884a5f1ca7f7c529d96ba1edde9272e9e48faf96] --- projects/hip/Jenkinsfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/projects/hip/Jenkinsfile b/projects/hip/Jenkinsfile index 6d37f10e3c..e6b60f398e 100644 --- a/projects/hip/Jenkinsfile +++ b/projects/hip/Jenkinsfile @@ -167,8 +167,6 @@ def docker_build_inside_image( def build_image, String inside_args, String platf } // Cap the maximum amount of testing, in case of hangs - // Excluding hipVectorTypes test from automation; due to regression from HCC commit 2367133 - // Excluding hipFloatMath test from automation; due to regression from ROCDL commit 2fc04e1 timeout(time: 1, unit: 'HOURS') { stage("${platform} unit testing") @@ -178,7 +176,7 @@ def docker_build_inside_image( def build_image, String inside_args, String platf cd ${build_dir_rel} make install -j\$(nproc) make build_tests -i -j\$(nproc) - ctest -E "(hipVectorTypes.tst|hipVectorTypesDevice.tst|hipFloatMath.tst)" + ctest """ // If unit tests output a junit or xunit file in the future, jenkins can parse that file // to display test results on the dashboard From b55426217c27c2824da599445584b09a81b6c0c0 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 19 Nov 2018 20:00:05 +0300 Subject: [PATCH 22/23] [HIPIFY] CUDA Driver API functions total revise (up to CUDA 10.0) + for all CUDA versions + add missing types + fix typos + sync with HIP + update CUDA_Driver_API_functions_supported_by_HIP.md + formatting, annotating [ROCm/hip commit: cfabad43540a6d04c8ce7fcd7a102edf718eaaf2] --- ...A_Driver_API_functions_supported_by_HIP.md | 43 +- ..._Runtime_API_functions_supported_by_HIP.md | 20 +- .../src/CUDA2HIP_Driver_API_functions.cpp | 1070 +++++++++++------ .../src/CUDA2HIP_Driver_API_types.cpp | 6 +- .../src/CUDA2HIP_Runtime_API_functions.cpp | 124 +- 5 files changed, 858 insertions(+), 405 deletions(-) diff --git a/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 9906ca6fa6..b4f379879b 100644 --- a/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -22,8 +22,8 @@ | typedef |`CUDA_RESOURCE_VIEW_DESC_st` | | | struct |`CUDA_TEXTURE_DESC` | | | typedef |`CUDA_TEXTURE_DESC_st` | | -| struct |`CUdevprop` |`hipDeviceProp_t` | -| typedef |`CUdevprop_st` |`hipDeviceProp_t` | +| struct |`CUdevprop` | | +| typedef |`CUdevprop_st` | | | struct |`CUipcEventHandle` |`ihipIpcEventHandle_t` | | typedef |`CUipcEventHandle_st` |`ihipIpcEventHandle_t` | | struct |`CUipcMemHandle` |`hipIpcMemHandle_t` | @@ -763,6 +763,7 @@ | `cuDeviceGetName` | `hipDeviceGetName` | | `cuDeviceTotalMem` | `hipDeviceTotalMem` | | `cuDeviceGetLuid` | | +| `cuDeviceGetUuid` | | ## **6. Device Management [DEPRECATED]** @@ -792,9 +793,9 @@ | `cuCtxGetCurrent` | `hipCtxGetCurrent` | | `cuCtxGetDevice` | `hipCtxGetDevice` | | `cuCtxGetFlags` | `hipCtxGetFlags` | -| `cuCtxGetLimit` | | +| `cuCtxGetLimit` | `hipDeviceGetLimit` | | `cuCtxGetSharedMemConfig` | `hipCtxGetSharedMemConfig` | -| `cuCtxGetStreamPriorityRange` | | +| `cuCtxGetStreamPriorityRange` | `hipDeviceGetStreamPriorityRange`| | `cuCtxPopCurrent` | `hipCtxPopCurrent` | | `cuCtxPushCurrent` | `hipCtxPushCurrent` | | `cuCtxSetCacheConfig` | `hipCtxSetCacheConfig` | @@ -835,16 +836,16 @@ |-----------------------------------------------------------|-------------------------------| | `cuArray3DCreate` | `hipArray3DCreate` | | `cuArray3DGetDescriptor` | | -| `cuArrayCreate` | | +| `cuArrayCreate` | `hipArrayCreate` | | `cuArrayDestroy` | | | `cuArrayGetDescriptor` | | | `cuDeviceGetByPCIBusId` | `hipDeviceGetByPCIBusId` | | `cuDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` | -| `cuIpcCloseMemHandle` | | +| `cuIpcCloseMemHandle` | `hipIpcCloseMemHandle` | | `cuIpcGetEventHandle` | | -| `cuIpcGetMemHandle` | | +| `cuIpcGetMemHandle` | `hipIpcGetMemHandle` | | `cuIpcOpenEventHandle` | | -| `cuIpcOpenMemHandle` | | +| `cuIpcOpenMemHandle` | `hipIpcOpenMemHandle` | | `cuMemAlloc` | `hipMalloc` | | `cuMemAllocHost` | | | `cuMemAllocManaged` | | @@ -867,7 +868,7 @@ | `cuMemcpyDtoDAsync` | `hipMemcpyDtoDAsync` | | `cuMemcpyDtoH` | `hipMemcpyDtoH` | | `cuMemcpyDtoHAsync` | `hipMemcpyDtoHAsync` | -| `cuMemcpyHtoA` | | +| `cuMemcpyHtoA` | `hipMemcpyHtoA` | | `cuMemcpyHtoAAsync` | | | `cuMemcpyHtoD` | `hipMemcpyHtoD` | | `cuMemcpyHtoDAsync` | `hipMemcpyHtoDAsync` | @@ -875,11 +876,11 @@ | `cuMemcpyPeerAsync` | | | `cuMemFree` | `hipFree` | | `cuMemFreeHost` | `hipFreeHost` | -| `cuMemGetAddressRange` | | +| `cuMemGetAddressRange` | `hipMemGetAddressRange` | | `cuMemGetInfo` | `hipMemGetInfo` | | `cuMemHostAlloc` | `hipHostMalloc` | -| `cuMemHostGetDevicePointer` | | -| `cuMemHostGetFlags` | | +| `cuMemHostGetDevicePointer` | `hipHostGetDevicePointer` | +| `cuMemHostGetFlags` | `hipHostGetFlags` | | `cuMemHostRegister` | `hipHostRegister` | | `cuMemHostUnregister` | `hipHostUnregister` | | `cuMemsetD16` | | @@ -892,8 +893,8 @@ | `cuMemsetD2D8Async` | | | `cuMemsetD32` | `hipMemset` | | `cuMemsetD32Async` | `hipMemsetAsync` | -| `cuMemsetD2D8` | | -| `cuMemsetD2D8Async` | | +| `cuMemsetD8` | `hipMemsetD8` | +| `cuMemsetD8Async` | | | `cuMipmappedArrayCreate` | | | `cuMipmappedArrayDestroy` | | | `cuMipmappedArrayGetLevel` | | @@ -916,8 +917,8 @@ |-----------------------------------------------------------|-------------------------------| | `cuStreamAddCallback` | `hipStreamAddCallback` | | `cuStreamAttachMemAsync` | | -| `cuStreamCreate` | | -| `cuStreamCreateWithPriority` | | +| `cuStreamCreate` | `hipStreamCreateWithFlags` | +| `cuStreamCreateWithPriority` | `hipStreamCreateWithPriority` | | `cuStreamDestroy` | `hipStreamDestroy` | | `cuStreamGetFlags` | `hipStreamGetFlags` | | `cuStreamGetPriority` | `hipStreamGetPriority` | @@ -932,7 +933,7 @@ | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------| -| `cuEventCreate` | `hipEventCreate` | +| `cuEventCreate` | `hipEventCreateWithFlags` | | `cuEventDestroy` | `hipEventDestroy` | | `cuEventElapsedTime` | `hipEventElapsedTime` | | `cuEventQuery` | `hipEventQuery` | @@ -967,10 +968,13 @@ | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------| | `cuFuncGetAttribute` | | +| `cuFuncSetAttribute` | | | `cuFuncSetCacheConfig` | `hipFuncSetCacheConfig` | | `cuFuncSetSharedMemConfig` | | | `cuLaunchKernel` | `hipModuleLaunchKernel` | | `cuLaunchHostFunc` | | +| `cuLaunchCooperativeKernel` | | +| `cuLaunchCooperativeKernelMultiDevice` | | ## **18. Execution Control [DEPRECATED]** @@ -1047,8 +1051,8 @@ | `cuTexRefGetMipmapLevelBias` | | | `cuTexRefGetMipmapLevelClamp` | | | `cuTexRefGetMipmappedArray` | | -| `cuTexRefSetAddress` | | -| `cuTexRefSetAddress2D` | | +| `cuTexRefSetAddress` | `hipTexRefSetAddress` | +| `cuTexRefSetAddress2D` | `hipTexRefSetAddress2D` | | `cuTexRefSetAddressMode` | `hipTexRefSetAddressMode` | | `cuTexRefSetArray` | `hipTexRefSetArray` | | `cuTexRefSetBorderColor` | | @@ -1233,3 +1237,4 @@ | `cuEGLStreamProducerReturnFrame` | | | `cuGraphicsEGLRegisterImage` | | | `cuGraphicsResourceGetMappedEglFrame` | | +| `cuEventCreateFromEGLSync` | | diff --git a/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index 1a55667f82..6190f6565a 100644 --- a/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -11,7 +11,7 @@ | `cudaDeviceGetLimit` | `hipDeviceGetLimit` | | `cudaDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` | | `cudaDeviceGetSharedMemConfig` | `hipDeviceGetSharedMemConfig` | -| `cudaDeviceGetStreamPriorityRange` | | +| `cudaDeviceGetStreamPriorityRange` | `hipDeviceGetStreamPriorityRange` | | `cudaDeviceReset` | `hipDeviceReset` | | `cudaDeviceSetCacheConfig` | `hipDeviceSetCacheConfig` | | `cudaDeviceSetLimit` | `hipDeviceSetLimit` | @@ -19,7 +19,7 @@ | `cudaDeviceSynchronize` | `hipDeviceSynchronize` | | `cudaGetDevice` | `hipGetDevice` | | `cudaGetDeviceCount` | `hipGetDeviceCount` | -| `cudaGetDeviceFlags` | | +| `cudaGetDeviceFlags` | `hipCtxGetFlags` | | `cudaGetDeviceProperties` | `hipGetDeviceProperties` | | `cudaIpcCloseMemHandle` | `hipIpcCloseMemHandle` | | `cudaIpcGetEventHandle` | `hipIpcGetEventHandle` | @@ -56,12 +56,15 @@ |-----------------------------------------------------------|-------------------------------| | `cudaStreamAddCallback` | `hipStreamAddCallback` | | `cudaStreamAttachMemAsync` | | +| `cudaStreamBeginCapture` | | +| `cudaStreamEndCapture` | | +| `cudaStreamIsCapturing` | | | `cudaStreamCreate` | `hipStreamCreate` | | `cudaStreamCreateWithFlags` | `hipStreamCreateWithFlags` | -| `cudaStreamCreateWithPriority` | | +| `cudaStreamCreateWithPriority` | `hipStreamCreateWithPriority` | | `cudaStreamDestroy` | `hipStreamDestroy` | | `cudaStreamGetFlags` | `hipStreamGetFlags` | -| `cudaStreamGetPriority` | | +| `cudaStreamGetPriority` | `hipStreamGetPriority` | | `cudaStreamQuery` | `hipStreamQuery` | | `cudaStreamSynchronize` | `hipStreamSynchronize` | | `cudaStreamWaitEvent` | `hipStreamWaitEvent` | @@ -82,7 +85,14 @@ | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------| - +| `cudaSignalExternalSemaphoresAsync` | | +| `cudaWaitExternalSemaphoresAsync` | | +| `cudaImportExternalMemory` | | +| `cudaExternalMemoryGetMappedBuffer` | | +| `cudaExternalMemoryGetMappedMipmappedArray` | | +| `cudaDestroyExternalMemory` | | +| `cudaImportExternalSemaphore` | | +| `cudaDestroyExternalSemaphore` | | ## **7. Execution Control** diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp index d74c4d4f1a..77dd67fd03 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp @@ -1,399 +1,753 @@ #include "CUDA2HIP.h" - -// Map of all functions +// Map of all CUDA Driver API functions const std::map CUDA_DRIVER_FUNCTION_MAP{ + // 5.2. Error Handling + // no analogue + // NOTE: cudaGetErrorName and hipGetErrorName have different signature + {"cuGetErrorName", {"hipGetErrorName_", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: cudaGetErrorString and hipGetErrorString have different signature + {"cuGetErrorString", {"hipGetErrorString_", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, - ///////////////////////////// CUDA DRIVER API ///////////////////////////// + // 5.3. Initialization + // no analogue + {"cuInit", {"hipInit", CONV_INIT, API_DRIVER}}, - // Error Handling - {"cuGetErrorName", {"hipGetErrorName___", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, // cudaGetErrorName (hipGetErrorName) has different signature - {"cuGetErrorString", {"hipGetErrorString___", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, // cudaGetErrorString (hipGetErrorString) has different signature + // 5.4 Version Management + // cudaDriverGetVersion + {"cuDriverGetVersion", {"hipDriverGetVersion", CONV_VERSION, API_DRIVER}}, - // Init - {"cuInit", {"hipInit", CONV_INIT, API_DRIVER}}, + // 5.5. Device Management + // cudaGetDevice + // NOTE: cudaGetDevice has additional attr: int ordinal + {"cuDeviceGet", {"hipGetDevice", CONV_DEVICE, API_DRIVER}}, + // cudaDeviceGetAttribute + {"cuDeviceGetAttribute", {"hipDeviceGetAttribute", CONV_DEVICE, API_DRIVER}}, + // cudaGetDeviceCount + {"cuDeviceGetCount", {"hipGetDeviceCount", CONV_DEVICE, API_DRIVER}}, + // no analogue + {"cuDeviceGetLuid", {"hipDeviceGetLuid", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuDeviceGetName", {"hipDeviceGetName", CONV_DEVICE, API_DRIVER}}, + // no analogue + {"cuDeviceGetUuid", {"hipDeviceGetUuid", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuDeviceTotalMem", {"hipDeviceTotalMem", CONV_DEVICE, API_DRIVER}}, + {"cuDeviceTotalMem_v2", {"hipDeviceTotalMem", CONV_DEVICE, API_DRIVER}}, - // Driver - {"cuDriverGetVersion", {"hipDriverGetVersion", CONV_VERSION, API_DRIVER}}, + // 5.6. Device Management [DEPRECATED] + {"cuDeviceComputeCapability", {"hipDeviceComputeCapability", CONV_DEVICE, API_DRIVER}}, + {"cuDeviceGetProperties", {"hipGetDeviceProperties", CONV_DEVICE, API_DRIVER}}, - // Context Management - {"cuCtxCreate_v2", {"hipCtxCreate", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxDestroy_v2", {"hipCtxDestroy", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetApiVersion", {"hipCtxGetApiVersion", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetCacheConfig", {"hipCtxGetCacheConfig", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetCurrent", {"hipCtxGetCurrent", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetDevice", {"hipCtxGetDevice", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetFlags", {"hipCtxGetFlags", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetLimit", {"hipCtxGetLimit", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxGetSharedMemConfig", {"hipCtxGetSharedMemConfig", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetStreamPriorityRange", {"hipCtxGetStreamPriorityRange", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxPopCurrent_v2", {"hipCtxPopCurrent", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPushCurrent_v2", {"hipCtxPushCurrent", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetCacheConfig", {"hipCtxSetCacheConfig", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetCurrent", {"hipCtxSetCurrent", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetLimit", {"hipCtxSetLimit", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxSetSharedMemConfig", {"hipCtxSetSharedMemConfig", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSynchronize", {"hipCtxSynchronize", CONV_CONTEXT, API_DRIVER}}, - // Context Management [DEPRECATED] - {"cuCtxAttach", {"hipCtxAttach", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxDetach", {"hipCtxDetach", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.7. Primary Context Management + // no analogues + {"cuDevicePrimaryCtxGetState", {"hipDevicePrimaryCtxGetState", CONV_CONTEXT, API_DRIVER}}, + {"cuDevicePrimaryCtxRelease", {"hipDevicePrimaryCtxRelease", CONV_CONTEXT, API_DRIVER}}, + {"cuDevicePrimaryCtxReset", {"hipDevicePrimaryCtxReset", CONV_CONTEXT, API_DRIVER}}, + {"cuDevicePrimaryCtxRetain", {"hipDevicePrimaryCtxRetain", CONV_CONTEXT, API_DRIVER}}, + {"cuDevicePrimaryCtxSetFlags", {"hipDevicePrimaryCtxSetFlags", CONV_CONTEXT, API_DRIVER}}, - // Primary Context Management - {"cuDevicePrimaryCtxGetState", {"hipDevicePrimaryCtxGetState", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxRelease", {"hipDevicePrimaryCtxRelease", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxReset", {"hipDevicePrimaryCtxReset", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxRetain", {"hipDevicePrimaryCtxRetain", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxSetFlags", {"hipDevicePrimaryCtxSetFlags", CONV_CONTEXT, API_DRIVER}}, + // 5.8. Context Management + // no analogues, except a few + {"cuCtxCreate", {"hipCtxCreate", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxCreate_v2", {"hipCtxCreate", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxDestroy", {"hipCtxDestroy", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxDestroy_v2", {"hipCtxDestroy", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetApiVersion", {"hipCtxGetApiVersion", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetCacheConfig", {"hipCtxGetCacheConfig", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetCurrent", {"hipCtxGetCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetDevice", {"hipCtxGetDevice", CONV_CONTEXT, API_DRIVER}}, + // cudaGetDeviceFlags + // TODO: rename to hipGetDeviceFlags + {"cuCtxGetFlags", {"hipCtxGetFlags", CONV_CONTEXT, API_DRIVER}}, + // cudaDeviceGetLimit + {"cuCtxGetLimit", {"hipDeviceGetLimit", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetSharedMemConfig", {"hipCtxGetSharedMemConfig", CONV_CONTEXT, API_DRIVER}}, + // cudaDeviceGetStreamPriorityRange + {"cuCtxGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxPopCurrent", {"hipCtxPopCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxPopCurrent_v2", {"hipCtxPopCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxPushCurrent", {"hipCtxPushCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxPushCurrent_v2", {"hipCtxPushCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxSetCacheConfig", {"hipCtxSetCacheConfig", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxSetCurrent", {"hipCtxSetCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxSetLimit", {"hipCtxSetLimit", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuCtxSetSharedMemConfig", {"hipCtxSetSharedMemConfig", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxSynchronize", {"hipCtxSynchronize", CONV_CONTEXT, API_DRIVER}}, - // 1. Device Management - {"cuDeviceGet", {"hipGetDevice", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetName", {"hipDeviceGetName", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetCount", {"hipGetDeviceCount", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetAttribute", {"hipDeviceGetAttribute", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceTotalMem_v2", {"hipDeviceTotalMem", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetLuid", {"hipDeviceGetLuid", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.9. Context Management [DEPRECATED] + // no analogues + {"cuCtxAttach", {"hipCtxAttach", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuCtxDetach", {"hipCtxDetach", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - // 12. Peer Context Memory Access - {"cuCtxEnablePeerAccess", {"hipCtxEnablePeerAccess", CONV_PEER, API_DRIVER}}, - {"cuCtxDisablePeerAccess", {"hipCtxDisablePeerAccess", CONV_PEER, API_DRIVER}}, - {"cuDeviceCanAccessPeer", {"hipDeviceCanAccessPeer", CONV_PEER, API_DRIVER}}, - {"cuDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", CONV_PEER, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaDeviceGetP2PAttribute) + // 5.10. Module Management + // no analogues + {"cuLinkAddData", {"hipLinkAddData", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkAddData_v2", {"hipLinkAddData", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkAddFile", {"hipLinkAddFile", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkAddFile_v2", {"hipLinkAddFile", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkComplete", {"hipLinkComplete", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkCreate", {"hipLinkCreate", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkCreate_v2", {"hipLinkCreate", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkDestroy", {"hipLinkDestroy", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuModuleGetFunction", {"hipModuleGetFunction", CONV_MODULE, API_DRIVER}}, + {"cuModuleGetGlobal", {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER}}, + {"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER}}, + {"cuModuleGetSurfRef", {"hipModuleGetSurfRef", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuModuleGetTexRef", {"hipModuleGetTexRef", CONV_MODULE, API_DRIVER}}, + {"cuModuleLoad", {"hipModuleLoad", CONV_MODULE, API_DRIVER}}, + {"cuModuleLoadData", {"hipModuleLoadData", CONV_MODULE, API_DRIVER}}, + {"cuModuleLoadDataEx", {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER}}, + {"cuModuleLoadFatBinary", {"hipModuleLoadFatBinary", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuModuleUnload", {"hipModuleUnload", CONV_MODULE, API_DRIVER}}, - // Device Management [DEPRECATED] - {"cuDeviceComputeCapability", {"hipDeviceComputeCapability", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetProperties", {"hipGetDeviceProperties", CONV_DEVICE, API_DRIVER}}, + // 5.11. Memory Management + // no analogue + {"cuArray3DCreate", {"hipArray3DCreate", CONV_MEMORY, API_DRIVER}}, + {"cuArray3DCreate_v2", {"hipArray3DCreate", CONV_MEMORY, API_DRIVER}}, + {"cuArray3DGetDescriptor", {"hipArray3DGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuArray3DGetDescriptor_v2", {"hipArray3DGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuArrayCreate", {"hipArrayCreate", CONV_MEMORY, API_DRIVER}}, + {"cuArrayCreate_v2", {"hipArrayCreate", CONV_MEMORY, API_DRIVER}}, + {"cuArrayDestroy", {"hipArrayDestroy", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuArrayGetDescriptor", {"hipArrayGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuArrayGetDescriptor_v2", {"hipArrayGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaDeviceGetByPCIBusId + {"cuDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", CONV_MEMORY, API_DRIVER}}, + // cudaDeviceGetPCIBusId + {"cuDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", CONV_MEMORY, API_DRIVER}}, + // cudaIpcCloseMemHandle + {"cuIpcCloseMemHandle", {"hipIpcCloseMemHandle", CONV_MEMORY, API_DRIVER}}, + // cudaIpcGetEventHandle + {"cuIpcGetEventHandle", {"hipIpcGetEventHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaIpcGetMemHandle + {"cuIpcGetMemHandle", {"hipIpcGetMemHandle", CONV_MEMORY, API_DRIVER}}, + // cudaIpcOpenEventHandle + {"cuIpcOpenEventHandle", {"hipIpcOpenEventHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaIpcOpenMemHandle + {"cuIpcOpenMemHandle", {"hipIpcOpenMemHandle", CONV_MEMORY, API_DRIVER}}, + // cudaMalloc + {"cuMemAlloc", {"hipMalloc", CONV_MEMORY, API_DRIVER}}, + {"cuMemAlloc_v2", {"hipMalloc", CONV_MEMORY, API_DRIVER}}, + // cudaHostAlloc + {"cuMemAllocHost", {"hipMemAllocHost", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemAllocHost_v2", {"hipMemAllocHost", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemAllocManaged", {"hipMemAllocManaged", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemAllocPitch", {"hipMemAllocPitch", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemAllocPitch_v2", {"hipMemAllocPitch", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy due to different signatures + {"cuMemcpy", {"hipMemcpy_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy2D due to different signatures + {"cuMemcpy2D", {"hipMemcpy2D_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy2D_v2", {"hipMemcpy2D_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy2DAsync due to different signatures + {"cuMemcpy2DAsync", {"hipMemcpy2DAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy2DAsync_v2", {"hipMemcpy2DAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpy2DUnaligned", {"hipMemcpy2DUnaligned", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy2DUnaligned_v2", {"hipMemcpy2DUnaligned", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy3D due to different signatures + {"cuMemcpy3D", {"hipMemcpy3D_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy3D_v2", {"hipMemcpy3D_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy3DAsync due to different signatures + {"cuMemcpy3DAsync", {"hipMemcpy3DAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy3DAsync_v2", {"hipMemcpy3DAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy3DPeer due to different signatures + {"cuMemcpy3DPeer", {"hipMemcpy3DPeer_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy3DPeerAsync due to different signatures + {"cuMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpyAsync due to different signatures + {"cuMemcpyAsync", {"hipMemcpyAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpyArrayToArray due to different signatures + {"cuMemcpyAtoA", {"hipMemcpyAtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoA_v2", {"hipMemcpyAtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyAtoD", {"hipMemcpyAtoD", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoD_v2", {"hipMemcpyAtoD", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyAtoH", {"hipMemcpyAtoH", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoH_v2", {"hipMemcpyAtoH", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyAtoHAsync", {"hipMemcpyAtoHAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoHAsync_v2", {"hipMemcpyAtoHAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyDtoA", {"hipMemcpyDtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyDtoA_v2", {"hipMemcpyDtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyDtoD", {"hipMemcpyDtoD", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyDtoD_v2", {"hipMemcpyDtoD", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyDtoDAsync", {"hipMemcpyDtoDAsync", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyDtoDAsync_v2", {"hipMemcpyDtoDAsync", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyDtoH", {"hipMemcpyDtoH", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyDtoH_v2", {"hipMemcpyDtoH", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyDtoHAsync", {"hipMemcpyDtoHAsync", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyDtoHAsync_v2", {"hipMemcpyDtoHAsync", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyHtoA", {"hipMemcpyHtoA", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyHtoA_v2", {"hipMemcpyHtoA", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyHtoAAsync", {"hipMemcpyHtoAAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyHtoAAsync_v2", {"hipMemcpyHtoAAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyHtoD", {"hipMemcpyHtoD", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyHtoD_v2", {"hipMemcpyHtoD", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyHtoDAsync", {"hipMemcpyHtoDAsync", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyHtoDAsync_v2", {"hipMemcpyHtoDAsync", CONV_MEMORY, API_DRIVER}}, + // no analogue + // NOTE: Not equal to cudaMemcpyPeer due to different signatures + {"cuMemcpyPeer", {"hipMemcpyPeer_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpyPeerAsync due to different signatures + {"cuMemcpyPeerAsync", {"hipMemcpyPeerAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaFree + {"cuMemFree", {"hipFree", CONV_MEMORY, API_DRIVER}}, + {"cuMemFree_v2", {"hipFree", CONV_MEMORY, API_DRIVER}}, + // cudaFreeHost + {"cuMemFreeHost", {"hipHostFree", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemGetAddressRange", {"hipMemGetAddressRange", CONV_MEMORY, API_DRIVER}}, + {"cuMemGetAddressRange_v2", {"hipMemGetAddressRange", CONV_MEMORY, API_DRIVER}}, + // cudaMemGetInfo + {"cuMemGetInfo", {"hipMemGetInfo", CONV_MEMORY, API_DRIVER}}, + {"cuMemGetInfo_v2", {"hipMemGetInfo", CONV_MEMORY, API_DRIVER}}, + // cudaHostAlloc + {"cuMemHostAlloc", {"hipHostMalloc", CONV_MEMORY, API_DRIVER}}, + // cudaHostGetDevicePointer + {"cuMemHostGetDevicePointer", {"hipHostGetDevicePointer", CONV_MEMORY, API_DRIVER}}, + {"cuMemHostGetDevicePointer_v2", {"hipHostGetDevicePointer", CONV_MEMORY, API_DRIVER}}, + // cudaHostGetFlags + {"cuMemHostGetFlags", {"hipMemHostGetFlags", CONV_MEMORY, API_DRIVER}}, + // cudaHostRegister + {"cuMemHostRegister", {"hipHostRegister", CONV_MEMORY, API_DRIVER}}, + {"cuMemHostRegister_v2", {"hipHostRegister", CONV_MEMORY, API_DRIVER}}, + // cudaHostUnregister + {"cuMemHostUnregister", {"hipHostUnregister", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemsetD16", {"hipMemsetD16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD16_v2", {"hipMemsetD16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD16Async", {"hipMemsetD16Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D16", {"hipMemsetD2D16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD2D16_v2", {"hipMemsetD2D16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D16Async", {"hipMemsetD2D16Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D32", {"hipMemsetD2D32", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD2D32_v2", {"hipMemsetD2D32", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D32Async", {"hipMemsetD2D32Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D8", {"hipMemsetD2D8", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD2D8_v2", {"hipMemsetD2D8", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D8Async", {"hipMemsetD2D8Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaMemset + {"cuMemsetD32", {"hipMemset", CONV_MEMORY, API_DRIVER}}, + {"cuMemsetD32_v2", {"hipMemset", CONV_MEMORY, API_DRIVER}}, + // cudaMemsetAsync + {"cuMemsetD32Async", {"hipMemsetAsync", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemsetD8", {"hipMemsetD8", CONV_MEMORY, API_DRIVER}}, + {"cuMemsetD8_v2", {"hipMemsetD8", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemsetD8Async", {"hipMemsetD8Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMallocMipmappedArray due to different signatures + {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaFreeMipmappedArray due to different signatures + {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaGetMipmappedArrayLevel due to different signatures + {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // Module Management - {"cuLinkAddData", {"hipLinkAddData", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkAddFile", {"hipLinkAddFile", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkComplete", {"hipLinkComplete", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkCreate", {"hipLinkCreate", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkDestroy", {"hipLinkDestroy", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleGetFunction", {"hipModuleGetFunction", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetSurfRef", {"hipModuleGetSurfRef", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleGetTexRef", {"hipModuleGetTexRef", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoad", {"hipModuleLoad", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadData", {"hipModuleLoadData", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadDataEx", {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadFatBinary", {"hipModuleLoadFatBinary", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleUnload", {"hipModuleUnload", CONV_MODULE, API_DRIVER}}, + // 5.12. Unified Addressing + // cudaMemAdvise + {"cuMemAdvise", {"hipMemAdvise", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // TODO: double check cudaMemPrefetchAsync + {"cuMemPrefetchAsync", {"hipMemPrefetchAsync_", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaMemRangeGetAttribute + {"cuMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaMemRangeGetAttributes + {"cuMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuPointerGetAttribute", {"hipPointerGetAttribute", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaPointerGetAttributes due to different signatures + {"cuPointerGetAttributes", {"hipPointerGetAttributes", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuPointerSetAttribute", {"hipPointerSetAttribute", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // Event functions - {"cuEventCreate", {"hipEventCreate", CONV_EVENT, API_DRIVER}}, - {"cuEventDestroy_v2", {"hipEventDestroy", CONV_EVENT, API_DRIVER}}, - {"cuEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_DRIVER}}, - {"cuEventQuery", {"hipEventQuery", CONV_EVENT, API_DRIVER}}, - {"cuEventRecord", {"hipEventRecord", CONV_EVENT, API_DRIVER}}, - {"cuEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_DRIVER}}, + // 5.13. Stream Management + // cudaStreamAddCallback + {"cuStreamAddCallback", {"hipStreamAddCallback", CONV_STREAM, API_DRIVER}}, + // cudaStreamAttachMemAsync + {"cuStreamAttachMemAsync", {"hipStreamAttachMemAsync", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaStreamBeginCapture + {"cuStreamBeginCapture", {"hipStreamBeginCapture", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaStreamCreateWithFlags + {"cuStreamCreate", {"hipStreamCreateWithFlags", CONV_STREAM, API_DRIVER}}, + // cudaStreamCreateWithPriority + {"cuStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_DRIVER}}, + // cudaStreamDestroy + {"cuStreamDestroy", {"hipStreamDestroy", CONV_STREAM, API_DRIVER}}, + {"cuStreamDestroy_v2", {"hipStreamDestroy", CONV_STREAM, API_DRIVER}}, + // cudaStreamEndCapture + {"cuStreamEndCapture", {"hipStreamEndCapture", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuStreamGetCtx", {"hipStreamGetContext", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaStreamGetFlags + {"cuStreamGetFlags", {"hipStreamGetFlags", CONV_STREAM, API_DRIVER}}, + // cudaStreamGetPriority + {"cuStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_DRIVER}}, + // cudaStreamIsCapturing + {"cuStreamIsCapturing", {"hipStreamIsCapturing", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaStreamQuery + {"cuStreamQuery", {"hipStreamQuery", CONV_STREAM, API_DRIVER}}, + // cudaStreamSynchronize + {"cuStreamSynchronize", {"hipStreamSynchronize", CONV_STREAM, API_DRIVER}}, + // cudaStreamWaitEvent + {"cuStreamWaitEvent", {"hipStreamWaitEvent", CONV_STREAM, API_DRIVER}}, - // External Resource Interoperability - {"cuSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuImportExternalMemory", {"hipImportExternalMemory", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuDestroyExternalMemory", {"hipDestroyExternalMemory", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuImportExternalSemaphore", {"hipImportExternalSemaphore", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.14. Event Management + // cudaEventCreateWithFlags + {"cuEventCreate", {"hipEventCreateWithFlags", CONV_EVENT, API_DRIVER}}, + // cudaEventDestroy + {"cuEventDestroy", {"hipEventDestroy", CONV_EVENT, API_DRIVER}}, + {"cuEventDestroy_v2", {"hipEventDestroy", CONV_EVENT, API_DRIVER}}, + // cudaEventElapsedTime + {"cuEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_DRIVER}}, + // cudaEventQuery + {"cuEventQuery", {"hipEventQuery", CONV_EVENT, API_DRIVER}}, + // cudaEventRecord + {"cuEventRecord", {"hipEventRecord", CONV_EVENT, API_DRIVER}}, + // cudaEventSynchronize + {"cuEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_DRIVER}}, - // Execution Control - {"cuFuncGetAttribute", {"hipFuncGetAttribute", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuFuncSetCacheConfig", {"hipFuncSetCacheConfig", CONV_EXECUTION, API_DRIVER}}, - {"cuFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLaunchKernel", {"hipModuleLaunchKernel", CONV_EXECUTION, API_DRIVER}}, - {"cuLaunchHostFunc", {"hipLaunchHostFunc", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.15. External Resource Interoperability + // cudaDestroyExternalMemory + {"cuDestroyExternalMemory", {"hipDestroyExternalMemory", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaDestroyExternalSemaphore + {"cuDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaExternalMemoryGetMappedBuffer + {"cuExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaExternalMemoryGetMappedMipmappedArray + {"cuExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaImportExternalMemory + {"cuImportExternalMemory", {"hipImportExternalMemory", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaImportExternalSemaphore + {"cuImportExternalSemaphore", {"hipImportExternalSemaphore", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaSignalExternalSemaphoresAsync + {"cuSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaWaitExternalSemaphoresAsync + {"cuWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // Execution Control [DEPRECATED] - {"cuFuncSetBlockShape", {"hipFuncSetBlockShape", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuFuncSetSharedSize", {"hipFuncSetSharedSize", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLaunch", {"hipLaunch", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaLaunch) - {"cuLaunchGrid", {"hipLaunchGrid", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLaunchGridAsync", {"hipLaunchGridAsync", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSetf", {"hipParamSetf", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSeti", {"hipParamSeti", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSetSize", {"hipParamSetSize", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSetSize", {"hipParamSetSize", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSetv", {"hipParamSetv", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.16. Stream Memory Operations + // no analogues + {"cuStreamBatchMemOp", {"hipStreamBatchMemOp", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuStreamWaitValue32", {"hipStreamWaitValue32", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuStreamWaitValue64", {"hipStreamWaitValue64", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuStreamWriteValue32", {"hipStreamWriteValue32", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuStreamWriteValue64", {"hipStreamWriteValue64", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // Graph Management - {"cuGraphCreate", {"hipGraphCreate", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphLaunch", {"hipGraphLaunch", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddKernelNode", {"hipGraphAddKernelNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphKernelNodeGetParams", {"hipGraphKernelNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphKernelNodeSetParams", {"hipGraphKernelNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddMemcpyNode", {"hipGraphAddMemcpyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphMemcpyNodeGetParams", {"hipGraphMemcpyNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphMemcpyNodeSetParams", {"hipGraphMemcpyNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddMemsetNode", {"hipGraphAddMemsetNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphMemsetNodeSetParams", {"hipGraphMemsetNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddHostNode", {"hipGraphAddHostNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphHostNodeGetParams", {"hipGraphHostNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphHostNodeSetParams", {"hipGraphHostNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddChildGraphNode", {"hipGraphAddChildGraphNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddEmptyNode", {"hipGraphAddEmptyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphClone", {"hipGraphClone", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphNodeFindInClone", {"hipGraphNodeFindInClone", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphNodeGetType", {"hipGraphNodeGetType", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphGetNodes", {"hipGraphGetNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphGetRootNodes", {"hipGraphGetRootNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphGetEdges", {"hipGraphGetEdges", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphNodeGetDependencies", {"hipGraphNodeGetDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphNodeGetDependentNodes", {"hipGraphNodeGetDependentNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddDependencies", {"hipGraphAddDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphRemoveDependencies", {"hipGraphRemoveDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphDestroyNode", {"hipGraphDestroyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphInstantiate", {"hipGraphInstantiate", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphExecDestroy", {"hipGraphExecDestroy", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphDestroy", {"hipGraphDestroy", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.17.Execution Control + // no analogue + {"cuFuncGetAttribute", {"hipFuncGetAttribute", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaFuncSetAttribute due to different signatures + {"cuFuncSetAttribute", {"hipFuncSetAttribute", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaFuncSetCacheConfig due to different signatures + {"cuFuncSetCacheConfig", {"hipFuncSetCacheConfig", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaFuncSetCacheConfig due to different signatures + {"cuFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaLaunchCooperativeKernel due to different signatures + {"cuLaunchCooperativeKernel", {"hipLaunchCooperativeKernel", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaLaunchCooperativeKernelMultiDevice due to different signatures + {"cuLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaLaunchHostFunc + {"cuLaunchHostFunc", {"hipLaunchHostFunc", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaLaunchKernel due to different signatures + {"cuLaunchKernel", {"hipModuleLaunchKernel", CONV_EXECUTION, API_DRIVER}}, - // Occupancy - {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", CONV_OCCUPANCY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaOccupancyMaxActiveBlocksPerMultiprocessor) - {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) - {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaOccupancyMaxPotentialBlockSize) - {"cuOccupancyMaxPotentialBlockSizeWithFlags", {"hipOccupancyMaxPotentialBlockSizeWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaOccupancyMaxPotentialBlockSizeWithFlags) + // 5.18.Execution Control [DEPRECATED] + // no analogue + {"cuFuncSetBlockShape", {"hipFuncSetBlockShape", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuFuncSetSharedSize", {"hipFuncSetSharedSize", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaLaunch due to different signatures + {"cuLaunch", {"hipLaunch", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuLaunchGrid", {"hipLaunchGrid", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuLaunchGridAsync", {"hipLaunchGridAsync", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSetf", {"hipParamSetf", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSeti", {"hipParamSeti", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSetSize", {"hipParamSetSize", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSetTexRef", {"hipParamSetTexRef", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSetv", {"hipParamSetv", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // Streams - {"cuStreamAddCallback", {"hipStreamAddCallback", CONV_STREAM, API_DRIVER}}, - {"cuStreamAttachMemAsync", {"hipStreamAttachMemAsync", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamCreate", {"hipStreamCreate__", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaStreamCreate due to different signatures - {"cuStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamDestroy_v2", {"hipStreamDestroy", CONV_STREAM, API_DRIVER}}, - {"cuStreamGetFlags", {"hipStreamGetFlags", CONV_STREAM, API_DRIVER}}, - {"cuStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamQuery", {"hipStreamQuery", CONV_STREAM, API_DRIVER}}, - {"cuStreamSynchronize", {"hipStreamSynchronize", CONV_STREAM, API_DRIVER}}, - {"cuStreamWaitEvent", {"hipStreamWaitEvent", CONV_STREAM, API_DRIVER}}, - {"cuStreamWaitValue32", {"hipStreamWaitValue32", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamWaitValue64", {"hipStreamWaitValue64", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamWriteValue32", {"hipStreamWriteValue32", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamWriteValue64", {"hipStreamWriteValue64", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamBatchMemOp", {"hipStreamBatchMemOp", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamBeginCapture", {"hipStreamBeginCapture", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamEndCapture", {"hipStreamEndCapture", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamIsCapturing", {"hipStreamIsCapturing", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.19. Graph Management + // cudaGraphAddChildGraphNode + {"cuGraphAddChildGraphNode", {"hipGraphAddChildGraphNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddDependencies + {"cuGraphAddDependencies", {"hipGraphAddDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddEmptyNode + {"cuGraphAddEmptyNode", {"hipGraphAddEmptyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddHostNode + {"cuGraphAddHostNode", {"hipGraphAddHostNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddKernelNode + {"cuGraphAddKernelNode", {"hipGraphAddKernelNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddMemcpyNode + {"cuGraphAddMemcpyNode", {"hipGraphAddMemcpyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddMemsetNode + {"cuGraphAddMemsetNode", {"hipGraphAddMemsetNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphChildGraphNodeGetGraph + {"cuGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphClone + {"cuGraphClone", {"hipGraphClone", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphCreate + {"cuGraphCreate", {"hipGraphCreate", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphDestroy + {"cuGraphDestroy", {"hipGraphDestroy", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphDestroyNode + {"cuGraphDestroyNode", {"hipGraphDestroyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphExecDestroy + {"cuGraphExecDestroy", {"hipGraphExecDestroy", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphGetEdges + {"cuGraphGetEdges", {"hipGraphGetEdges", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphGetNodes + {"cuGraphGetNodes", {"hipGraphGetNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphGetRootNodes + {"cuGraphGetRootNodes", {"hipGraphGetRootNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphHostNodeGetParams + {"cuGraphHostNodeGetParams", {"hipGraphHostNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphHostNodeSetParams + {"cuGraphHostNodeSetParams", {"hipGraphHostNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphInstantiate + {"cuGraphInstantiate", {"hipGraphInstantiate", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphKernelNodeGetParams + {"cuGraphKernelNodeGetParams", {"hipGraphKernelNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphKernelNodeSetParams + {"cuGraphKernelNodeSetParams", {"hipGraphKernelNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphLaunch + {"cuGraphLaunch", {"hipGraphLaunch", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphMemcpyNodeGetParams + {"cuGraphMemcpyNodeGetParams", {"hipGraphMemcpyNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphMemcpyNodeSetParams + {"cuGraphMemcpyNodeSetParams", {"hipGraphMemcpyNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphMemsetNodeGetParams + {"cuGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphMemsetNodeSetParams + {"cuGraphMemsetNodeSetParams", {"hipGraphMemsetNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphNodeFindInClone + {"cuGraphNodeFindInClone", {"hipGraphNodeFindInClone", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphNodeGetDependencies + {"cuGraphNodeGetDependencies", {"hipGraphNodeGetDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphNodeGetDependentNodes + {"cuGraphNodeGetDependentNodes", {"hipGraphNodeGetDependentNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphNodeGetType + {"cuGraphNodeGetType", {"hipGraphNodeGetType", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphRemoveDependencies + {"cuGraphRemoveDependencies", {"hipGraphRemoveDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // Memory management - {"cuArray3DCreate", {"hipArray3DCreate", CONV_MEMORY, API_DRIVER}}, - {"cuArray3DGetDescriptor", {"hipArray3DGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayCreate", {"hipArrayCreate", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayDestroy", {"hipArrayDestroy", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayGetDescriptor", {"hipArrayGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcCloseMemHandle", {"hipIpcCloseMemHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcGetEventHandle", {"hipIpcGetEventHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcGetMemHandle", {"hipIpcGetMemHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcOpenEventHandle", {"hipIpcOpenEventHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcOpenMemHandle", {"hipIpcOpenMemHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAlloc_v2", {"hipMalloc", CONV_MEMORY, API_DRIVER}}, - {"cuMemAllocHost", {"hipMemAllocHost", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAllocManaged", {"hipMemAllocManaged", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAllocPitch", {"hipMemAllocPitch__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemAllocPitch due to different signatures - {"cuMemcpy", {"hipMemcpy__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy due to different signatures - {"cuMemcpy2D", {"hipMemcpy2D__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy2D due to different signatures - {"cuMemcpy2DAsync", {"hipMemcpy2DAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy2DAsync due to different signatures - {"cuMemcpy2DUnaligned", {"hipMemcpy2DUnaligned", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpy3D", {"hipMemcpy3D__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy3D due to different signatures - {"cuMemcpy3DAsync", {"hipMemcpy3DAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy3DAsync due to different signatures - {"cuMemcpy3DPeer", {"hipMemcpy3DPeer__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy3DPeer due to different signatures - {"cuMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy3DPeerAsync due to different signatures - {"cuMemcpyAsync", {"hipMemcpyAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpyAsync due to different signatures - {"cuMemcpyAtoA", {"hipMemcpyAtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoD", {"hipMemcpyAtoD", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoH", {"hipMemcpyAtoH", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoHAsync", {"hipMemcpyAtoHAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyDtoA", {"hipMemcpyDtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyDtoD_v2", {"hipMemcpyDtoD", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoDAsync_v2", {"hipMemcpyDtoDAsync", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoH_v2", {"hipMemcpyDtoH", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoHAsync_v2", {"hipMemcpyDtoHAsync", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoA", {"hipMemcpyHtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyHtoAAsync", {"hipMemcpyHtoAAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyHtoD_v2", {"hipMemcpyHtoD", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoDAsync_v2", {"hipMemcpyHtoDAsync", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyPeerAsync", {"hipMemcpyPeerAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpyPeerAsync due to different signatures - {"cuMemcpyPeer", {"hipMemcpyPeer__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpyPeer due to different signatures - {"cuMemFree_v2", {"hipFree", CONV_MEMORY, API_DRIVER}}, - {"cuMemFreeHost", {"hipHostFree", CONV_MEMORY, API_DRIVER}}, - {"cuMemGetAddressRange", {"hipMemGetAddressRange", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemGetInfo_v2", {"hipMemGetInfo", CONV_MEMORY, API_DRIVER}}, - {"cuMemHostAlloc", {"hipHostMalloc", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaHostAlloc) - {"cuMemHostGetDevicePointer", {"hipMemHostGetDevicePointer", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemHostGetFlags", {"hipMemHostGetFlags", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemHostRegister_v2", {"hipHostRegister", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaHostAlloc) - {"cuMemHostUnregister", {"hipHostUnregister", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaHostUnregister) - {"cuMemsetD16_v2", {"hipMemsetD16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD16Async", {"hipMemsetD16Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D16_v2", {"hipMemsetD2D16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D16Async", {"hipMemsetD2D16Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D32_v2", {"hipMemsetD2D32", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D32Async", {"hipMemsetD2D32Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D8_v2", {"hipMemsetD2D8", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D8Async", {"hipMemsetD2D8Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD32_v2", {"hipMemset", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaMemset) - {"cuMemsetD32Async", {"hipMemsetAsync", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaMemsetAsync) - {"cuMemsetD8_v2", {"hipMemsetD8", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD8Async", {"hipMemsetD8Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.20. Occupancy + // cudaOccupancyMaxActiveBlocksPerMultiprocessor + {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", CONV_OCCUPANCY, API_DRIVER}}, + // cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags + {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaOccupancyMaxPotentialBlockSize + {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER}}, + // cudaOccupancyMaxPotentialBlockSizeWithFlags + {"cuOccupancyMaxPotentialBlockSizeWithFlags", {"hipOccupancyMaxPotentialBlockSizeWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, - // Unified Addressing - {"cuMemPrefetchAsync", {"hipMemPrefetchAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // // no API_Runtime ANALOGUE (cudaMemPrefetchAsync has different signature) - {"cuMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // // API_Runtime ANALOGUE (cudaMemAdvise) - {"cuMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // // API_Runtime ANALOGUE (cudaMemRangeGetAttribute) - {"cuMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // // API_Runtime ANALOGUE (cudaMemRangeGetAttributes) - {"cuPointerGetAttribute", {"hipPointerGetAttribute", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuPointerGetAttributes", {"hipPointerGetAttributes", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuPointerSetAttribute", {"hipPointerSetAttribute", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.21. Texture Reference Management + // no analogues + {"cuTexRefGetAddress", {"hipTexRefGetAddress", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetAddress_v2", {"hipTexRefGetAddress", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetAddressMode", {"hipTexRefGetAddressMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetArray", {"hipTexRefGetArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetBorderColor", {"hipTexRefGetBorderColor", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetFilterMode", {"hipTexRefGetFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetFlags", {"hipTexRefGetFlags", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetFormat", {"hipTexRefGetFormat", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMaxAnisotropy", {"hipTexRefGetMaxAnisotropy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMipmapFilterMode", {"hipTexRefGetMipmapFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMipmapLevelBias", {"hipTexRefGetMipmapLevelBias", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMipmapLevelClamp", {"hipTexRefGetMipmapLevelClamp", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMipmappedArray", {"hipTexRefGetMipmappedArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetAddress", {"hipTexRefSetAddress", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddress_v2", {"hipTexRefSetAddress", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddress2D", {"hipTexRefSetAddress2D", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddress2D_v2", {"hipTexRefSetAddress2D", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddress2D_v3", {"hipTexRefSetAddress2D", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddressMode", {"hipTexRefSetAddressMode", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetArray", {"hipTexRefSetArray", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetBorderColor", {"hipTexRefSetBorderColor", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetFilterMode", {"hipTexRefSetFilterMode", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetFlags", {"hipTexRefSetFlags", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetFormat", {"hipTexRefSetFormat", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetMaxAnisotropy", {"hipTexRefSetMaxAnisotropy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetMipmapFilterMode", {"hipTexRefSetMipmapFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetMipmapLevelBias", {"hipTexRefSetMipmapLevelBias", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetMipmapLevelClamp", {"hipTexRefSetMipmapLevelClamp", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetMipmappedArray", {"hipTexRefSetMipmappedArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // Texture Reference Mngmnt + // 5.22. Texture Reference Management [DEPRECATED] + // no analogues + {"cuTexRefCreate", {"hipTexRefCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefDestroy", {"hipTexRefDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetAddress", {"hipTexRefGetAddress", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetAddressMode", {"hipTexRefGetAddressMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetArray", {"hipTexRefGetArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetBorderColor", {"hipTexRefGetBorderColor", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, // // no API_Runtime ANALOGUE - {"cuTexRefGetFilterMode", {"hipTexRefGetFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFlags", {"hipTexRefGetFlags", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFormat", {"hipTexRefGetFormat", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMaxAnisotropy", {"hipTexRefGetMaxAnisotropy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapFilterMode", {"hipTexRefGetMipmapFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapLevelBias", {"hipTexRefGetMipmapLevelBias", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapLevelClamp", {"hipTexRefGetMipmapLevelClamp", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmappedArray", {"hipTexRefGetMipmappedArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetAddress", {"hipTexRefSetAddress", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetAddress2D", {"hipTexRefSetAddress2D", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetAddressMode", {"hipTexRefSetAddressMode", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetArray", {"hipTexRefSetArray", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetBorderColor", {"hipTexRefSetBorderColor", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, // // no API_Runtime ANALOGUE - {"cuTexRefSetFilterMode", {"hipTexRefSetFilterMode", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetFlags", {"hipTexRefSetFlags", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetFormat", {"hipTexRefSetFormat", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetMaxAnisotropy", {"hipTexRefSetMaxAnisotropy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapFilterMode", {"hipTexRefSetMipmapFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapLevelBias", {"hipTexRefSetMipmapLevelBias", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapLevelClamp", {"hipTexRefSetMipmapLevelClamp", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmappedArray", {"hipTexRefSetMipmappedArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.23. Surface Reference Management + // no analogues + {"cuSurfRefGetArray", {"hipSurfRefGetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuSurfRefSetArray", {"hipSurfRefSetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, - // Texture Reference Mngmnt [DEPRECATED] - {"cuTexRefCreate", {"hipTexRefCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefDestroy", {"hipTexRefDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.24. Texture Object Management + // no analogue + // NOTE: Not equal to cudaCreateTextureObject due to different signatures + {"cuTexObjectCreate", {"hipTexObjectCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaDestroyTextureObject + {"cuTexObjectDestroy", {"hipTexObjectDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaGetTextureObjectResourceDesc due to different signatures + {"cuTexObjectGetResourceDesc", {"hipTexObjectGetResourceDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGetTextureObjectResourceViewDesc + {"cuTexObjectGetResourceViewDesc", {"hipTexObjectGetResourceViewDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaGetTextureObjectTextureDesc due to different signatures + {"cuTexObjectGetTextureDesc", {"hipTexObjectGetTextureDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // Surface Reference Mngmnt - {"cuSurfRefGetArray", {"hipSurfRefGetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuSurfRefSetArray", {"hipSurfRefSetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.25. Surface Object Management + // no analogue + // NOTE: Not equal to cudaCreateSurfaceObject due to different signatures + {"cuSurfObjectCreate", {"hipSurfObjectCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaDestroySurfaceObject + {"cuSurfObjectDestroy", {"hipSurfObjectDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaGetSurfaceObjectResourceDesc due to different signatures + {"cuSurfObjectGetResourceDesc", {"hipSurfObjectGetResourceDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // Texture Object Mngmnt - {"cuTexObjectCreate", {"hipTexObjectCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexObjectDestroy", {"hipTexObjectDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexObjectGetResourceDesc", {"hipTexObjectGetResourceDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexObjectGetResourceViewDesc", {"hipTexObjectGetResourceViewDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexObjectGetTextureDesc", {"hipTexObjectGetTextureDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.26. Peer Context Memory Access + // no analogue + // NOTE: Not equal to cudaDeviceEnablePeerAccess due to different signatures + {"cuCtxEnablePeerAccess", {"hipCtxEnablePeerAccess", CONV_PEER, API_DRIVER}}, + // no analogue + // NOTE: Not equal to cudaDeviceDisablePeerAccess due to different signatures + {"cuCtxDisablePeerAccess", {"hipCtxDisablePeerAccess", CONV_PEER, API_DRIVER}}, + // cudaDeviceCanAccessPeer + {"cuDeviceCanAccessPeer", {"hipDeviceCanAccessPeer", CONV_PEER, API_DRIVER}}, + // cudaDeviceGetP2PAttribute + {"cuDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", CONV_PEER, API_DRIVER, HIP_UNSUPPORTED}}, - // Surface Object Mngmnt - {"cuSurfObjectCreate", {"hipSurfObjectCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuSurfObjectDestroy", {"hipSurfObjectDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuSurfObjectGetResourceDesc", {"hipSurfObjectGetResourceDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.27. Graphics Interoperability + // cudaGraphicsMapResources + {"cuGraphicsMapResources", {"hipGraphicsMapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceGetMappedMipmappedArray + {"cuGraphicsResourceGetMappedMipmappedArray", {"hipGraphicsResourceGetMappedMipmappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceGetMappedPointer + {"cuGraphicsResourceGetMappedPointer", {"hipGraphicsResourceGetMappedPointer", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceGetMappedPointer + {"cuGraphicsResourceGetMappedPointer_v2", {"hipGraphicsResourceGetMappedPointer", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceSetMapFlags + {"cuGraphicsResourceSetMapFlags", {"hipGraphicsResourceSetMapFlags", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceSetMapFlags + {"cuGraphicsResourceSetMapFlags_v2", {"hipGraphicsResourceSetMapFlags", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsSubResourceGetMappedArray + {"cuGraphicsSubResourceGetMappedArray", {"hipGraphicsSubResourceGetMappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsUnmapResources + {"cuGraphicsUnmapResources", {"hipGraphicsUnmapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsUnregisterResource + {"cuGraphicsUnregisterResource", {"hipGraphicsUnregisterResource", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // Graphics Interoperability - {"cuGraphicsMapResources", {"hipGraphicsMapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsMapResources) - {"cuGraphicsResourceGetMappedMipmappedArray", {"hipGraphicsResourceGetMappedMipmappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsResourceGetMappedMipmappedArray) - {"cuGraphicsResourceGetMappedPointer", {"hipGraphicsResourceGetMappedPointer", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsResourceGetMappedPointer) - {"cuGraphicsResourceSetMapFlags", {"hipGraphicsResourceSetMapFlags", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsResourceSetMapFlags) - {"cuGraphicsSubResourceGetMappedArray", {"hipGraphicsSubResourceGetMappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsSubResourceGetMappedArray) - {"cuGraphicsUnmapResources", {"hipGraphicsUnmapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsUnmapResources) - {"cuGraphicsUnregisterResource", {"hipGraphicsUnregisterResource", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsUnregisterResource) + // 5.28. Profiler Control + // cudaProfilerInitialize + {"cuProfilerInitialize", {"hipProfilerInitialize", CONV_PROFILER, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaProfilerStart + {"cuProfilerStart", {"hipProfilerStart", CONV_PROFILER, API_DRIVER}}, + // cudaProfilerStop + {"cuProfilerStop", {"hipProfilerStop", CONV_PROFILER, API_DRIVER}}, - // Profiler - {"cuProfilerInitialize", {"hipProfilerInitialize", CONV_PROFILER, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaProfilerInitialize) - {"cuProfilerStart", {"hipProfilerStart", CONV_PROFILER, API_DRIVER}}, // API_Runtime ANALOGUE (cudaProfilerStart) - {"cuProfilerStop", {"hipProfilerStop", CONV_PROFILER, API_DRIVER}}, // API_Runtime ANALOGUE (cudaProfilerStop) + // 5.29. OpenGL Interoperability + // cudaGLGetDevices + {"cuGLGetDevices", {"hipGLGetDevices", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsGLRegisterBuffer + {"cuGraphicsGLRegisterBuffer", {"hipGraphicsGLRegisterBuffer", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsGLRegisterImage + {"cuGraphicsGLRegisterImage", {"hipGraphicsGLRegisterImage", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaWGLGetDevice + {"cuWGLGetDevice", {"hipWGLGetDevice", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGLGetDevices", {"hipGLGetDevices", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLGetDevices) - {"cuGraphicsGLRegisterBuffer", {"hipGraphicsGLRegisterBuffer", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsGLRegisterBuffer) - {"cuGraphicsGLRegisterImage", {"hipGraphicsGLRegisterImage", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsGLRegisterImage) - {"cuWGLGetDevice", {"hipWGLGetDevice", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaWGLGetDevice) + // 5.29. OpenGL Interoperability [DEPRECATED] + // no analogue + {"cuGLCtxCreate", {"hipGLCtxCreate", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuGLInit", {"hipGLInit", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // NOTE: Not equal to cudaGLMapBufferObject due to different signatures + {"cuGLMapBufferObject", {"hipGLMapBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // NOTE: Not equal to cudaGLMapBufferObjectAsync due to different signatures + {"cuGLMapBufferObjectAsync", {"hipGLMapBufferObjectAsync", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLRegisterBufferObject + {"cuGLRegisterBufferObject", {"hipGLRegisterBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLSetBufferObjectMapFlags + {"cuGLSetBufferObjectMapFlags", {"hipGLSetBufferObjectMapFlags", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLUnmapBufferObject + {"cuGLUnmapBufferObject", {"hipGLUnmapBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLUnmapBufferObjectAsync + {"cuGLUnmapBufferObjectAsync", {"hipGLUnmapBufferObjectAsync", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLUnregisterBufferObject + {"cuGLUnregisterBufferObject", {"hipGLUnregisterBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGLCtxCreate", {"hipGLCtxCreate", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuGLInit", {"hipGLInit", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuGLMapBufferObject", {"hipGLMapBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaGLMapBufferObject due to different signatures - {"cuGLMapBufferObjectAsync", {"hipGLMapBufferObjectAsync", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaGLMapBufferObjectAsync due to different signatures - {"cuGLRegisterBufferObject", {"hipGLRegisterBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLRegisterBufferObject) - {"cuGLSetBufferObjectMapFlags", {"hipGLSetBufferObjectMapFlags", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLSetBufferObjectMapFlags) - {"cuGLUnmapBufferObject", {"hipGLUnmapBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLUnmapBufferObject) - {"cuGLUnmapBufferObjectAsync", {"hipGLUnmapBufferObjectAsync", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLUnmapBufferObjectAsync) - {"cuGLUnregisterBufferObject", {"hipGLUnregisterBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLUnregisterBufferObject) + // 5.30.Direct3D 9 Interoperability + // no analogue + {"cuD3D9CtxCreate", {"hipD3D9CtxCreate", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuD3D9CtxCreateOnDevice", {"hipD3D9CtxCreateOnDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9GetDevice + {"cuD3D9GetDevice", {"hipD3D9GetDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9GetDevices + {"cuD3D9GetDevices", {"hipD3D9GetDevices", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9GetDirect3DDevice + {"cuD3D9GetDirect3DDevice", {"hipD3D9GetDirect3DDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsD3D9RegisterResource + {"cuGraphicsD3D9RegisterResource", {"hipGraphicsD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuD3D9CtxCreate", {"hipD3D9CtxCreate", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D9CtxCreateOnDevice", {"hipD3D9CtxCreateOnDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D9GetDevice", {"hipD3D9GetDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9GetDevice) - {"cuD3D9GetDevices", {"hipD3D9GetDevices", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9GetDevices) - {"cuD3D9GetDirect3DDevice", {"hipD3D9GetDirect3DDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9GetDirect3DDevice) - {"cuGraphicsD3D9RegisterResource", {"hipGraphicsD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsD3D9RegisterResource) + // 5.30.Direct3D 9 Interoperability [DEPRECATED] + // cudaD3D9MapResources + {"cuD3D9MapResources", {"hipD3D9MapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9RegisterResource + {"cuD3D9RegisterResource", {"hipD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetMappedArray + {"cuD3D9ResourceGetMappedArray", {"hipD3D9ResourceGetMappedArray", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetMappedPitch + {"cuD3D9ResourceGetMappedPitch", {"hipD3D9ResourceGetMappedPitch", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetMappedPointer + {"cuD3D9ResourceGetMappedPointer", {"hipD3D9ResourceGetMappedPointer", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetMappedSize + {"cuD3D9ResourceGetMappedSize", {"hipD3D9ResourceGetMappedSize", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetSurfaceDimensions + {"cuD3D9ResourceGetSurfaceDimensions", {"hipD3D9ResourceGetSurfaceDimensions", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceSetMapFlags + {"cuD3D9ResourceSetMapFlags", {"hipD3D9ResourceSetMapFlags", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9UnmapResources + {"cuD3D9UnmapResources", {"hipD3D9UnmapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9UnregisterResource + {"cuD3D9UnregisterResource", {"hipD3D9UnregisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuD3D9MapResources", {"hipD3D9MapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9MapResources) - {"cuD3D9RegisterResource", {"hipD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9RegisterResource) - {"cuD3D9ResourceGetMappedArray", {"hipD3D9ResourceGetMappedArray", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedArray) - {"cuD3D9ResourceGetMappedPitch", {"hipD3D9ResourceGetMappedPitch", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedPitch) - {"cuD3D9ResourceGetMappedPointer", {"hipD3D9ResourceGetMappedPointer", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedPointer) - {"cuD3D9ResourceGetMappedSize", {"hipD3D9ResourceGetMappedSize", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedSize) - {"cuD3D9ResourceGetSurfaceDimensions", {"hipD3D9ResourceGetSurfaceDimensions", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetSurfaceDimensions) - {"cuD3D9ResourceSetMapFlags", {"hipD3D9ResourceSetMapFlags", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceSetMapFlags) - {"cuD3D9UnmapResources", {"hipD3D9UnmapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9UnmapResources) - {"cuD3D9UnregisterResource", {"hipD3D9UnregisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9UnregisterResource) + // 5.31. Direct3D 10 Interoperability + // cudaD3D10GetDevice + {"cuD3D10GetDevice", {"hipD3D10GetDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10GetDevices + {"cuD3D10GetDevices", {"hipD3D10GetDevices", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsD3D10RegisterResource + {"cuGraphicsD3D10RegisterResource", {"hipGraphicsD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // Direct3D 10 Interoperability - {"cuD3D10GetDevice", {"hipD3D10GetDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10GetDevice) - {"cuD3D10GetDevices", {"hipD3D10GetDevices", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10GetDevices) - {"cuGraphicsD3D10RegisterResource", {"hipGraphicsD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsD3D10RegisterResource) + // 5.31. Direct3D 10 Interoperability [DEPRECATED] + // no analogue + {"cuD3D10CtxCreate", {"hipD3D10CtxCreate", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuD3D10CtxCreateOnDevice", {"hipD3D10CtxCreateOnDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10GetDirect3DDevice + {"cuD3D10GetDirect3DDevice", {"hipD3D10GetDirect3DDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10MapResources + {"cuD3D10MapResources", {"hipD3D10MapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10RegisterResource + {"cuD3D10RegisterResource", {"hipD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetMappedArray + {"cuD3D10ResourceGetMappedArray", {"hipD3D10ResourceGetMappedArray", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetMappedPitch + {"cuD3D10ResourceGetMappedPitch", {"hipD3D10ResourceGetMappedPitch", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetMappedPointer + {"cuD3D10ResourceGetMappedPointer", {"hipD3D10ResourceGetMappedPointer", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetMappedSize + {"cuD3D10ResourceGetMappedSize", {"hipD3D10ResourceGetMappedSize", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetSurfaceDimensions + {"cuD3D10ResourceGetSurfaceDimensions", {"hipD3D10ResourceGetSurfaceDimensions", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceSetMapFlags + {"cuD310ResourceSetMapFlags", {"hipD3D10ResourceSetMapFlags", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10UnmapResources + {"cuD3D10UnmapResources", {"hipD3D10UnmapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10UnregisterResource + {"cuD3D10UnregisterResource", {"hipD3D10UnregisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // Direct3D 10 Interoperability [DEPRECATED] - {"cuD3D10CtxCreate", {"hipD3D10CtxCreate", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D10CtxCreateOnDevice", {"hipD3D10CtxCreateOnDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D10GetDirect3DDevice", {"hipD3D10GetDirect3DDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10GetDirect3DDevice) - {"cuD3D10MapResources", {"hipD3D10MapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10MapResources) - {"cuD3D10RegisterResource", {"hipD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10RegisterResource) - {"cuD3D10ResourceGetMappedArray", {"hipD3D10ResourceGetMappedArray", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedArray) - {"cuD3D10ResourceGetMappedPitch", {"hipD3D10ResourceGetMappedPitch", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedPitch) - {"cuD3D10ResourceGetMappedPointer", {"hipD3D10ResourceGetMappedPointer", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedPointer) - {"cuD3D10ResourceGetMappedSize", {"hipD3D10ResourceGetMappedSize", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedSize) - {"cuD3D10ResourceGetSurfaceDimensions", {"hipD3D10ResourceGetSurfaceDimensions", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetSurfaceDimensions) - {"cuD310ResourceSetMapFlags", {"hipD3D10ResourceSetMapFlags", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceSetMapFlags) - {"cuD3D10UnmapResources", {"hipD3D10UnmapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10UnmapResources) - {"cuD3D10UnregisterResource", {"hipD3D10UnregisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10UnregisterResource) + // 5.32. Direct3D 11 Interoperability + // cudaD3D11GetDevice + {"cuD3D11GetDevice", {"hipD3D11GetDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D11GetDevices + {"cuD3D11GetDevices", {"hipD3D11GetDevices", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsD3D11RegisterResource + {"cuGraphicsD3D11RegisterResource", {"hipGraphicsD3D11RegisterResource", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // Direct3D 11 Interoperability - {"cuD3D11GetDevice", {"hipD3D11GetDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D11GetDevice) - {"cuD3D11GetDevices", {"hipD3D11GetDevices", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D11GetDevices) - {"cuGraphicsD3D11RegisterResource", {"hipGraphicsD3D11RegisterResource", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsD3D11RegisterResource) + // 5.32. Direct3D 11 Interoperability [DEPRECATED] + // no analogue + {"cuD3D11CtxCreate", {"hipD3D11CtxCreate", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuD3D11CtxCreateOnDevice", {"hipD3D11CtxCreateOnDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D11GetDirect3DDevice + {"cuD3D11GetDirect3DDevice", {"hipD3D11GetDirect3DDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // Direct3D 11 Interoperability [DEPRECATED] - {"cuD3D11CtxCreate", {"hipD3D11CtxCreate", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D11CtxCreateOnDevice", {"hipD3D11CtxCreateOnDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D11GetDirect3DDevice", {"hipD3D11GetDirect3DDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D11GetDirect3DDevice) - - // VDPAU Interoperability - {"cuGraphicsVDPAURegisterOutputSurface", {"hipGraphicsVDPAURegisterOutputSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsVDPAURegisterOutputSurface) - {"cuGraphicsVDPAURegisterVideoSurface", {"hipGraphicsVDPAURegisterVideoSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsVDPAURegisterVideoSurface) - {"cuVDPAUGetDevice", {"hipVDPAUGetDevice", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaVDPAUGetDevice) - {"cuVDPAUCtxCreate", {"hipVDPAUCtxCreate", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - - // EGL Interoperability - {"cuEGLStreamConsumerAcquireFrame", {"hipEGLStreamConsumerAcquireFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamConsumerAcquireFrame) - {"cuEGLStreamConsumerConnect", {"hipEGLStreamConsumerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamConsumerConnect) - {"cuEGLStreamConsumerConnectWithFlags", {"hipEGLStreamConsumerConnectWithFlags", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamConsumerConnectWithFlags) - {"cuEGLStreamConsumerDisconnect", {"hipEGLStreamConsumerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuEGLStreamConsumerReleaseFrame", {"hipEGLStreamConsumerReleaseFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamConsumerReleaseFrame) - {"cuEGLStreamProducerConnect", {"hipEGLStreamProducerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamProducerConnect) - {"cuEGLStreamProducerDisconnect", {"hipEGLStreamProducerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamProducerDisconnect) - {"cuEGLStreamProducerPresentFrame", {"hipEGLStreamProducerPresentFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamProducerPresentFrame) - {"cuEGLStreamProducerReturnFrame", {"hipEGLStreamProducerReturnFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamProducerReturnFrame) - {"cuGraphicsEGLRegisterImage", {"hipGraphicsEGLRegisterImage", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsEGLRegisterImage) - {"cuGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsResourceGetMappedEglFrame) + // 5.33. VDPAU Interoperability + // cudaGraphicsVDPAURegisterOutputSurface + {"cuGraphicsVDPAURegisterOutputSurface", {"hipGraphicsVDPAURegisterOutputSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsVDPAURegisterVideoSurface + {"cuGraphicsVDPAURegisterVideoSurface", {"hipGraphicsVDPAURegisterVideoSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaVDPAUGetDevice + {"cuVDPAUGetDevice", {"hipVDPAUGetDevice", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuVDPAUCtxCreate", {"hipVDPAUCtxCreate", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.34. EGL Interoperability + // cudaEGLStreamConsumerAcquireFrame + {"cuEGLStreamConsumerAcquireFrame", {"hipEGLStreamConsumerAcquireFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamConsumerConnect + {"cuEGLStreamConsumerConnect", {"hipEGLStreamConsumerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamConsumerConnectWithFlags + {"cuEGLStreamConsumerConnectWithFlags", {"hipEGLStreamConsumerConnectWithFlags", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamConsumerDisconnect + {"cuEGLStreamConsumerDisconnect", {"hipEGLStreamConsumerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamConsumerReleaseFrame + {"cuEGLStreamConsumerReleaseFrame", {"hipEGLStreamConsumerReleaseFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamProducerConnect + {"cuEGLStreamProducerConnect", {"hipEGLStreamProducerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamProducerDisconnect + {"cuEGLStreamProducerDisconnect", {"hipEGLStreamProducerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamProducerPresentFrame + {"cuEGLStreamProducerPresentFrame", {"hipEGLStreamProducerPresentFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamProducerReturnFrame + {"cuEGLStreamProducerReturnFrame", {"hipEGLStreamProducerReturnFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsEGLRegisterImage + {"cuGraphicsEGLRegisterImage", {"hipGraphicsEGLRegisterImage", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceGetMappedEglFrame + {"cuGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEventCreateFromEGLSync + {"cuEventCreateFromEGLSync", {"hipEventCreateFromEGLSync", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, ////////////////////////////// cuComplex API ////////////////////////////// {"cuFloatComplex", {"hipFloatComplex", CONV_TYPE, API_COMPLEX}}, @@ -423,4 +777,4 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ {"cuComplexDoubleToFloat", {"hipComplexDoubleToFloat", CONV_COMPLEX, API_COMPLEX}}, {"cuCfmaf", {"hipCfmaf", CONV_COMPLEX, API_COMPLEX}}, {"cuCfma", {"hipCfma", CONV_COMPLEX, API_COMPLEX}}, -}; \ No newline at end of file +}; diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp index 07b78ac738..5438aab3de 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp @@ -80,8 +80,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP{ {"CUDA_TEXTURE_DESC_st", {"HIP_TEXTURE_DESC", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, {"CUDA_TEXTURE_DESC", {"HIP_TEXTURE_DESC", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUdevprop_st", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}}, - {"CUdevprop", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}}, + // no analogue + // NOTE: cudaDeviceProp differs + {"CUdevprop_st", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, + {"CUdevprop", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // cudaIpcEventHandle_st {"CUipcEventHandle_st", {"ihipIpcEventHandle_t", CONV_TYPE, API_DRIVER}}, diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp index 6c81de2817..1eaa0903e1 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp @@ -1,6 +1,6 @@ #include "CUDA2HIP.h" -// Map of all functions +// Map of all CUDA Runtime API functions const std::map CUDA_RUNTIME_FUNCTION_MAP{ // Error API {"cudaGetLastError", {"hipGetLastError", CONV_ERROR, API_RUNTIME}}, @@ -9,29 +9,49 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"cudaGetErrorString", {"hipGetErrorString", CONV_ERROR, API_RUNTIME}}, // memcpy functions + // no analogue + // NOTE: Not equal to cuMemcpy due to different signatures {"cudaMemcpy", {"hipMemcpy", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpyToArray", {"hipMemcpyToArray", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpyToSymbol", {"hipMemcpyToSymbol", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpyToSymbolAsync", {"hipMemcpyToSymbolAsync", CONV_MEMORY, API_RUNTIME}}, + {"cudaMemcpyAsync", {"hipMemcpyAsync", CONV_MEMORY, API_RUNTIME}}, + // no analogue + // NOTE: Not equal to cuMemcpy2D due to different signatures {"cudaMemcpy2D", {"hipMemcpy2D", CONV_MEMORY, API_RUNTIME}}, + // no analogue + // NOTE: Not equal to cuMemcpy2DAsync due to different signatures {"cudaMemcpy2DAsync", {"hipMemcpy2DAsync", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpy2DToArray", {"hipMemcpy2DToArray", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpy2DArrayToArray", {"hipMemcpy2DArrayToArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpy2DFromArray", {"hipMemcpy2DFromArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpy2DFromArrayAsync", {"hipMemcpy2DFromArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpy2DToArrayAsync", {"hipMemcpy2DToArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMemcpy3D due to different signatures {"cudaMemcpy3D", {"hipMemcpy3D", CONV_MEMORY, API_RUNTIME}}, + // no analogue + // NOTE: Not equal to cuMemcpy3DAsync due to different signatures {"cudaMemcpy3DAsync", {"hipMemcpy3DAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMemcpy3DPeer due to different signatures {"cudaMemcpy3DPeer", {"hipMemcpy3DPeer", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMemcpy3DPeerAsync due to different signatures {"cudaMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMemcpyAtoA due to different signatures {"cudaMemcpyArrayToArray", {"hipMemcpyArrayToArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpyFromArrayAsync", {"hipMemcpyFromArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpyFromSymbol", {"hipMemcpyFromSymbol", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpyFromSymbolAsync", {"hipMemcpyFromSymbolAsync", CONV_MEMORY, API_RUNTIME}}, - {"cudaMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // - {"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // - {"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // + // cuMemAdvise + {"cudaMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuMemRangeGetAttribute + {"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuMemRangeGetAttributes + {"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // memset {"cudaMemset", {"hipMemset", CONV_MEMORY, API_RUNTIME}}, @@ -42,13 +62,17 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"cudaMemset3DAsync", {"hipMemset3DAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // Memory management + // cuMemGetInfo {"cudaMemGetInfo", {"hipMemGetInfo", CONV_MEMORY, API_RUNTIME}}, {"cudaArrayGetInfo", {"hipArrayGetInfo", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMipmappedArrayDestroy due to different signatures {"cudaFreeMipmappedArray", {"hipFreeMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaGetMipmappedArrayLevel", {"hipGetMipmappedArrayLevel", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaGetSymbolAddress", {"hipGetSymbolAddress", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaGetSymbolSize", {"hipGetSymbolSize", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // // API_Driver ANALOGUE (cuMemPrefetchAsync) + // TODO: double check cuMemPrefetchAsync + {"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // malloc {"cudaMalloc", {"hipMalloc", CONV_MEMORY, API_RUNTIME}}, @@ -57,15 +81,22 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"cudaMalloc3D", {"hipMalloc3D", CONV_MEMORY, API_RUNTIME}}, {"cudaMalloc3DArray", {"hipMalloc3DArray", CONV_MEMORY, API_RUNTIME}}, {"cudaMallocManaged", {"hipMallocManaged", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMipmappedArrayCreate due to different signatures {"cudaMallocMipmappedArray", {"hipMallocMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMallocPitch", {"hipMallocPitch", CONV_MEMORY, API_RUNTIME}}, + // cuMemFree {"cudaFree", {"hipFree", CONV_MEMORY, API_RUNTIME}}, + // cuMemFreeHost {"cudaFreeHost", {"hipHostFree", CONV_MEMORY, API_RUNTIME}}, {"cudaFreeArray", {"hipFreeArray", CONV_MEMORY, API_RUNTIME}}, + // cuMemHostRegister {"cudaHostRegister", {"hipHostRegister", CONV_MEMORY, API_RUNTIME}}, + // cuMemHostUnregister {"cudaHostUnregister", {"hipHostUnregister", CONV_MEMORY, API_RUNTIME}}, - // hipHostAlloc deprecated - use hipHostMalloc instead + // cuMemHostAlloc + // NOTE: hipHostAlloc deprecated - use hipHostMalloc instead {"cudaHostAlloc", {"hipHostMalloc", CONV_MEMORY, API_RUNTIME}}, // make memory functions @@ -74,35 +105,81 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"make_cudaPos", {"make_hipPos", CONV_MEMORY, API_RUNTIME}}, // Host Register Flags + // cuMemHostGetFlags {"cudaHostGetFlags", {"hipHostGetFlags", CONV_MEMORY, API_RUNTIME}}, // Events - {"cudaEventCreate", {"hipEventCreate", CONV_EVENT, API_RUNTIME}}, - {"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", CONV_EVENT, API_RUNTIME}}, - {"cudaEventDestroy", {"hipEventDestroy", CONV_EVENT, API_RUNTIME}}, - {"cudaEventRecord", {"hipEventRecord", CONV_EVENT, API_RUNTIME}}, - {"cudaEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_RUNTIME}}, - {"cudaEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_RUNTIME}}, - {"cudaEventQuery", {"hipEventQuery", CONV_EVENT, API_RUNTIME}}, + // no analogue + // NOTE: Not equal to cuEventCreate due to different signatures + {"cudaEventCreate", {"hipEventCreate", CONV_EVENT, API_RUNTIME}}, + // cuEventCreate + {"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", CONV_EVENT, API_RUNTIME}}, + // cuEventDestroy + {"cudaEventDestroy", {"hipEventDestroy", CONV_EVENT, API_RUNTIME}}, + // cuEventRecord + {"cudaEventRecord", {"hipEventRecord", CONV_EVENT, API_RUNTIME}}, + // cuEventElapsedTime + {"cudaEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_RUNTIME}}, + // cuEventSynchronize + {"cudaEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_RUNTIME}}, + // cuEventQuery + {"cudaEventQuery", {"hipEventQuery", CONV_EVENT, API_RUNTIME}}, + + // 5.6. External Resource Interoperability + // cuDestroyExternalMemory + {"cudaDestroyExternalMemory", {"hipDestroyExternalMemory", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuDestroyExternalSemaphore + {"cudaDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuExternalMemoryGetMappedBuffer + {"cudaExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuExternalMemoryGetMappedMipmappedArray + {"cudaExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuImportExternalMemory + {"cudaImportExternalMemory", {"hipImportExternalMemory", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuImportExternalSemaphore + {"cudaImportExternalSemaphore", {"hipImportExternalSemaphore", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuSignalExternalSemaphoresAsync + {"cudaSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuWaitExternalSemaphoresAsync + {"cudaWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, // Streams + // no analogue + // NOTE: Not equal to cuStreamCreate due to different signatures {"cudaStreamCreate", {"hipStreamCreate", CONV_STREAM, API_RUNTIME}}, + // cuStreamCreate {"cudaStreamCreateWithFlags", {"hipStreamCreateWithFlags", CONV_STREAM, API_RUNTIME}}, - {"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamCreateWithPriority + {"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_RUNTIME}}, + // cuStreamDestroy {"cudaStreamDestroy", {"hipStreamDestroy", CONV_STREAM, API_RUNTIME}}, + // cuStreamWaitEvent {"cudaStreamWaitEvent", {"hipStreamWaitEvent", CONV_STREAM, API_RUNTIME}}, + // cuStreamSynchronize {"cudaStreamSynchronize", {"hipStreamSynchronize", CONV_STREAM, API_RUNTIME}}, + // cuStreamGetFlags {"cudaStreamGetFlags", {"hipStreamGetFlags", CONV_STREAM, API_RUNTIME}}, + // cuStreamQuery {"cudaStreamQuery", {"hipStreamQuery", CONV_STREAM, API_RUNTIME}}, + // cuStreamAddCallback {"cudaStreamAddCallback", {"hipStreamAddCallback", CONV_STREAM, API_RUNTIME}}, + // cuStreamAttachMemAsync {"cudaStreamAttachMemAsync", {"hipStreamAttachMemAsync", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamBeginCapture + {"cudaStreamBeginCapture", {"hipStreamBeginCapture", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamEndCapture + {"cudaStreamEndCapture", {"hipStreamEndCapture", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamIsCapturing + {"cudaStreamIsCapturing", {"hipStreamIsCapturing", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamGetPriority + {"cudaStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_RUNTIME}}, // Other synchronization {"cudaDeviceSynchronize", {"hipDeviceSynchronize", CONV_DEVICE, API_RUNTIME}}, {"cudaDeviceReset", {"hipDeviceReset", CONV_DEVICE, API_RUNTIME}}, {"cudaSetDevice", {"hipSetDevice", CONV_DEVICE, API_RUNTIME}}, {"cudaGetDevice", {"hipGetDevice", CONV_DEVICE, API_RUNTIME}}, + // cuDeviceGetCount {"cudaGetDeviceCount", {"hipGetDeviceCount", CONV_DEVICE, API_RUNTIME}}, {"cudaChooseDevice", {"hipChooseDevice", CONV_DEVICE, API_RUNTIME}}, @@ -118,20 +195,25 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"cudaDeviceGetAttribute", {"hipDeviceGetAttribute", CONV_DEVICE, API_RUNTIME}}, // Pointer Attributes - // struct cudaPointerAttributes - {"cudaPointerGetAttributes", {"hipPointerGetAttributes", CONV_MEMORY, API_RUNTIME}}, - + // no analogue + // NOTE: Not equal to cuPointerGetAttributes due to different signatures + {"cudaPointerGetAttributes", {"hipPointerGetAttributes", CONV_ADDRESSING, API_RUNTIME}}, + // cuMemHostGetDevicePointer {"cudaHostGetDevicePointer", {"hipHostGetDevicePointer", CONV_MEMORY, API_RUNTIME}}, // Device {"cudaGetDeviceProperties", {"hipGetDeviceProperties", CONV_DEVICE, API_RUNTIME}}, + // cuDeviceGetPCIBusId {"cudaDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", CONV_DEVICE, API_RUNTIME}}, + // cuDeviceGetByPCIBusId {"cudaDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", CONV_DEVICE, API_RUNTIME}}, - {"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuCtxGetStreamPriorityRange + {"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_DEVICE, API_RUNTIME}}, {"cudaSetValidDevices", {"hipSetValidDevices", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, // Device Flags - {"cudaGetDeviceFlags", {"hipGetDeviceFlags", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuCtxGetFlags + {"cudaGetDeviceFlags", {"hipCtxGetFlags", CONV_DEVICE, API_RUNTIME}}, {"cudaSetDeviceFlags", {"hipSetDeviceFlags", CONV_DEVICE, API_RUNTIME}}, // Cache config @@ -179,7 +261,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ // {"cudaThreadGetSharedMemConfig", {"hipDeviceGetSharedMemConfig", CONV_DEVICE, API_RUNTIME}}, // {"cudaThreadSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", CONV_DEVICE, API_RUNTIME}}, - + // cuCtxGetLimit {"cudaDeviceGetLimit", {"hipDeviceGetLimit", CONV_DEVICE, API_RUNTIME}}, // Profiler From 223ab79e90664e0e3ffb2779ca5c23584dd3d108 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 19 Nov 2018 21:04:47 +0300 Subject: [PATCH 23/23] [HIPIFY] Move Complex API types and functions to separate files [ROCm/hip commit: 8aefe12b8e8458f22e3f6c41775643df102b37af] --- projects/hip/hipify-clang/src/CUDA2HIP.cpp | 2 ++ projects/hip/hipify-clang/src/CUDA2HIP.h | 4 +++ .../src/CUDA2HIP_Complex_API_functions.cpp | 28 ++++++++++++++++++ .../src/CUDA2HIP_Complex_API_types.cpp | 8 +++++ .../src/CUDA2HIP_Driver_API_functions.cpp | 29 ------------------- 5 files changed, 42 insertions(+), 29 deletions(-) create mode 100644 projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp create mode 100644 projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp diff --git a/projects/hip/hipify-clang/src/CUDA2HIP.cpp b/projects/hip/hipify-clang/src/CUDA2HIP.cpp index 02f3ae0f12..c0879c1f98 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP.cpp @@ -51,6 +51,8 @@ const std::map& CUDA_RENAMES_MAP() { ret.insert(CUDA_DRIVER_FUNCTION_MAP.begin(), CUDA_DRIVER_FUNCTION_MAP.end()); ret.insert(CUDA_RUNTIME_TYPE_NAME_MAP.begin(), CUDA_RUNTIME_TYPE_NAME_MAP.end()); ret.insert(CUDA_RUNTIME_FUNCTION_MAP.begin(), CUDA_RUNTIME_FUNCTION_MAP.end()); + ret.insert(CUDA_COMPLEX_TYPE_NAME_MAP.begin(), CUDA_COMPLEX_TYPE_NAME_MAP.end()); + ret.insert(CUDA_COMPLEX_FUNCTION_MAP.begin(), CUDA_COMPLEX_FUNCTION_MAP.end()); ret.insert(CUDA_BLAS_TYPE_NAME_MAP.begin(), CUDA_BLAS_TYPE_NAME_MAP.end()); ret.insert(CUDA_BLAS_FUNCTION_MAP.begin(), CUDA_BLAS_FUNCTION_MAP.end()); ret.insert(CUDA_RAND_TYPE_NAME_MAP.begin(), CUDA_RAND_TYPE_NAME_MAP.end()); diff --git a/projects/hip/hipify-clang/src/CUDA2HIP.h b/projects/hip/hipify-clang/src/CUDA2HIP.h index 9593c216a4..5c3a6fa246 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP.h +++ b/projects/hip/hipify-clang/src/CUDA2HIP.h @@ -15,6 +15,10 @@ extern const std::map CUDA_DRIVER_TYPE_NAME_MAP; extern const std::map CUDA_DRIVER_FUNCTION_MAP; // Maps the names of CUDA RUNTIME API types to the corresponding HIP types extern const std::map CUDA_RUNTIME_TYPE_NAME_MAP; +// Maps the names of CUDA Complex API types to the corresponding HIP types +extern const std::map CUDA_COMPLEX_TYPE_NAME_MAP; +// Maps the names of CUDA Complex API functions to the corresponding HIP functions +extern const std::map CUDA_COMPLEX_FUNCTION_MAP; // Maps the names of CUDA RUNTIME API functions to the corresponding HIP functions extern const std::map CUDA_RUNTIME_FUNCTION_MAP; // Maps the names of CUDA BLAS API types to the corresponding HIP types diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp new file mode 100644 index 0000000000..3bc7c4f0a0 --- /dev/null +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp @@ -0,0 +1,28 @@ +#include "CUDA2HIP.h" + +// Maps the names of CUDA DRIVER API types to the corresponding HIP types +const std::map CUDA_COMPLEX_FUNCTION_MAP{ + {"cuCrealf", {"hipCrealf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCimagf", {"hipCimagf", CONV_COMPLEX, API_COMPLEX}}, + {"make_cuFloatComplex", {"make_hipFloatComplex", CONV_COMPLEX, API_COMPLEX}}, + {"cuConjf", {"hipConjf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCaddf", {"hipCaddf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCsubf", {"hipCsubf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCmulf", {"hipCmulf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCdivf", {"hipCdivf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCabsf", {"hipCabsf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCreal", {"hipCreal", CONV_COMPLEX, API_COMPLEX}}, + {"cuCimag", {"hipCimag", CONV_COMPLEX, API_COMPLEX}}, + {"make_cuDoubleComplex", {"make_hipDoubleComplex", CONV_COMPLEX, API_COMPLEX}}, + {"cuConj", {"hipConj", CONV_COMPLEX, API_COMPLEX}}, + {"cuCadd", {"hipCadd", CONV_COMPLEX, API_COMPLEX}}, + {"cuCsub", {"hipCsub", CONV_COMPLEX, API_COMPLEX}}, + {"cuCmul", {"hipCmul", CONV_COMPLEX, API_COMPLEX}}, + {"cuCdiv", {"hipCdiv", CONV_COMPLEX, API_COMPLEX}}, + {"cuCabs", {"hipCabs", CONV_COMPLEX, API_COMPLEX}}, + {"make_cuComplex", {"make_hipComplex", CONV_COMPLEX, API_COMPLEX}}, + {"cuComplexFloatToDouble", {"hipComplexFloatToDouble", CONV_COMPLEX, API_COMPLEX}}, + {"cuComplexDoubleToFloat", {"hipComplexDoubleToFloat", CONV_COMPLEX, API_COMPLEX}}, + {"cuCfmaf", {"hipCfmaf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCfma", {"hipCfma", CONV_COMPLEX, API_COMPLEX}}, +}; diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp new file mode 100644 index 0000000000..f371cf3b9a --- /dev/null +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp @@ -0,0 +1,8 @@ +#include "CUDA2HIP.h" + +// Maps the names of CUDA DRIVER API types to the corresponding HIP types +const std::map CUDA_COMPLEX_TYPE_NAME_MAP{ + {"cuFloatComplex", {"hipFloatComplex", CONV_TYPE, API_COMPLEX}}, + {"cuDoubleComplex", {"hipDoubleComplex", CONV_TYPE, API_COMPLEX}}, + {"cuComplex", {"hipComplex", CONV_TYPE, API_COMPLEX}}, +}; diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp index 77dd67fd03..6871be877b 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp @@ -748,33 +748,4 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ {"cuGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // cudaEventCreateFromEGLSync {"cuEventCreateFromEGLSync", {"hipEventCreateFromEGLSync", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - -////////////////////////////// cuComplex API ////////////////////////////// - {"cuFloatComplex", {"hipFloatComplex", CONV_TYPE, API_COMPLEX}}, - {"cuDoubleComplex", {"hipDoubleComplex", CONV_TYPE, API_COMPLEX}}, - {"cuComplex", {"hipComplex", CONV_TYPE, API_COMPLEX}}, - - {"cuCrealf", {"hipCrealf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCimagf", {"hipCimagf", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuFloatComplex", {"make_hipFloatComplex", CONV_COMPLEX, API_COMPLEX}}, - {"cuConjf", {"hipConjf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCaddf", {"hipCaddf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCsubf", {"hipCsubf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCmulf", {"hipCmulf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCdivf", {"hipCdivf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCabsf", {"hipCabsf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCreal", {"hipCreal", CONV_COMPLEX, API_COMPLEX}}, - {"cuCimag", {"hipCimag", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuDoubleComplex", {"make_hipDoubleComplex", CONV_COMPLEX, API_COMPLEX}}, - {"cuConj", {"hipConj", CONV_COMPLEX, API_COMPLEX}}, - {"cuCadd", {"hipCadd", CONV_COMPLEX, API_COMPLEX}}, - {"cuCsub", {"hipCsub", CONV_COMPLEX, API_COMPLEX}}, - {"cuCmul", {"hipCmul", CONV_COMPLEX, API_COMPLEX}}, - {"cuCdiv", {"hipCdiv", CONV_COMPLEX, API_COMPLEX}}, - {"cuCabs", {"hipCabs", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuComplex", {"make_hipComplex", CONV_COMPLEX, API_COMPLEX}}, - {"cuComplexFloatToDouble", {"hipComplexFloatToDouble", CONV_COMPLEX, API_COMPLEX}}, - {"cuComplexDoubleToFloat", {"hipComplexDoubleToFloat", CONV_COMPLEX, API_COMPLEX}}, - {"cuCfmaf", {"hipCfmaf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCfma", {"hipCfma", CONV_COMPLEX, API_COMPLEX}}, };