4e1f6f2a0e
Change-Id: I3094c15008093f2072bcd38aca4ea90aeae2d97b
[ROCm/hip commit: 2af31479e2]
2536 regels
95 KiB
C++
2536 regels
95 KiB
C++
/*
|
|
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
#include <hc_am.hpp>
|
|
#include "hsa/hsa.h"
|
|
#include "hsa/hsa_ext_amd.h"
|
|
|
|
#include "hip/hip_runtime.h"
|
|
#include "hip_hcc_internal.h"
|
|
#include "trace_helper.h"
|
|
|
|
#include <functional>
|
|
#include <fstream>
|
|
|
|
__device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
|
|
__device__ uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
|
|
|
|
// Internal HIP APIS:
|
|
namespace hip_internal {
|
|
|
|
namespace {
|
|
inline
|
|
const char* hsa_to_string(hsa_status_t err) noexcept
|
|
{
|
|
const char* r{};
|
|
|
|
if (hsa_status_string(err, &r) == HSA_STATUS_SUCCESS) return r;
|
|
|
|
return "Unknown.";
|
|
}
|
|
|
|
template<std::size_t m, std::size_t n>
|
|
inline
|
|
void throwing_result_check(hsa_status_t res, const char (&file)[m],
|
|
const char (&function)[n], int line) {
|
|
if (res == HSA_STATUS_SUCCESS) return;
|
|
if (res == HSA_STATUS_INFO_BREAK) return;
|
|
|
|
throw std::runtime_error{"Failed in file " + (file +
|
|
(", in function \"" + (function +
|
|
("\", on line " + std::to_string(line))))) +
|
|
", with error: " + hsa_to_string(res)};
|
|
}
|
|
|
|
inline
|
|
hsa_agent_t cpu_agent() {
|
|
hsa_agent_t r{};
|
|
throwing_result_check(hsa_iterate_agents([](hsa_agent_t x, void* pr) {
|
|
hsa_device_type_t t{};
|
|
hsa_agent_get_info(x, HSA_AGENT_INFO_DEVICE, &t);
|
|
|
|
if (t != HSA_DEVICE_TYPE_CPU) return HSA_STATUS_SUCCESS;
|
|
|
|
*static_cast<hsa_agent_t *>(pr) = x;
|
|
|
|
return HSA_STATUS_INFO_BREAK;
|
|
}, &r), __FILE__, __func__, __LINE__);
|
|
|
|
return r;
|
|
}
|
|
|
|
inline
|
|
hsa_device_type_t type(hsa_agent_t x)
|
|
{
|
|
hsa_device_type_t r{};
|
|
throwing_result_check(hsa_agent_get_info(x, HSA_AGENT_INFO_DEVICE, &r),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
return r;
|
|
}
|
|
|
|
const auto is_large_BAR{[](){
|
|
std::unique_ptr<void, void (*)(void*)> hsa{
|
|
(hsa_init() == HSA_STATUS_SUCCESS)
|
|
? reinterpret_cast<void*>(UINT64_MAX) : nullptr,
|
|
[](void* p) { if (p) hsa_shut_down(); }};
|
|
|
|
if (!hsa) return false;
|
|
|
|
bool r{true};
|
|
|
|
throwing_result_check(hsa_iterate_agents([](hsa_agent_t x, void* pr) {
|
|
if (x.handle == cpu_agent().handle) return HSA_STATUS_SUCCESS;
|
|
|
|
throwing_result_check(
|
|
hsa_agent_iterate_regions(x, [](hsa_region_t y, void* p) {
|
|
hsa_region_segment_t seg{};
|
|
throwing_result_check(
|
|
hsa_region_get_info(y, HSA_REGION_INFO_SEGMENT, &seg),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
if (seg != HSA_REGION_SEGMENT_GLOBAL) {
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
uint32_t flags{};
|
|
throwing_result_check(hsa_region_get_info(
|
|
y, HSA_REGION_INFO_GLOBAL_FLAGS, &flags),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
|
|
hsa_amd_memory_pool_access_t tmp{};
|
|
throwing_result_check(
|
|
hsa_amd_agent_memory_pool_get_info(
|
|
cpu_agent(),
|
|
hsa_amd_memory_pool_t{y.handle},
|
|
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
|
|
&tmp),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
*static_cast<bool*>(p) &=
|
|
tmp != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
|
|
}
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
}, pr), __FILE__, __func__, __LINE__);
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
}, &r), __FILE__, __func__, __LINE__);
|
|
|
|
return r;
|
|
}()};
|
|
|
|
constexpr std::uint32_t is_cpu_owned{UINT32_MAX};
|
|
|
|
inline
|
|
hsa_amd_pointer_info_t info(const void* p)
|
|
{
|
|
hsa_amd_pointer_info_t r{sizeof(hsa_amd_pointer_info_t)};
|
|
throwing_result_check(
|
|
hsa_amd_pointer_info(
|
|
const_cast<void*>(p), &r, nullptr, nullptr, nullptr),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
if (type(r.agentOwner) == HSA_DEVICE_TYPE_CPU) r.size = is_cpu_owned;
|
|
|
|
return r;
|
|
}
|
|
|
|
constexpr size_t staging_sz{4 * 1024 * 1024}; // 2 Pages.
|
|
constexpr size_t max_h2d_std_memcpy_sz{8 * 1024}; // 8 KiB.
|
|
constexpr size_t max_d2h_std_memcpy_sz{64}; // 1 cacheline.
|
|
|
|
thread_local const std::unique_ptr<void, void (*)(void *)> staging_buffer{
|
|
[]() {
|
|
hsa_region_t r{};
|
|
throwing_result_check(hsa_agent_iterate_regions(
|
|
cpu_agent(), [](hsa_region_t x, void *p) {
|
|
hsa_region_segment_t seg{};
|
|
throwing_result_check(
|
|
hsa_region_get_info(x, HSA_REGION_INFO_SEGMENT, &seg),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
if (seg != HSA_REGION_SEGMENT_GLOBAL) return HSA_STATUS_SUCCESS;
|
|
|
|
uint32_t flags{};
|
|
throwing_result_check(hsa_region_get_info(
|
|
x, HSA_REGION_INFO_GLOBAL_FLAGS, &flags),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
|
|
*static_cast<hsa_region_t *>(p) = x;
|
|
|
|
return HSA_STATUS_INFO_BREAK;
|
|
}
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
}, &r), __FILE__, __func__, __LINE__);
|
|
|
|
void *tp{};
|
|
throwing_result_check(hsa_memory_allocate(r, staging_sz, &tp),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
return tp;
|
|
}(),
|
|
[](void *ptr) { hsa_memory_free(ptr); }};
|
|
|
|
thread_local hsa_signal_t copy_signal{[]() {
|
|
hsa_agent_t cpu{cpu_agent()};
|
|
hsa_signal_t sgn{};
|
|
throwing_result_check(hsa_signal_create(1, 1, &cpu, &sgn),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
return sgn;
|
|
}()};
|
|
} // Unnamed namespace.
|
|
|
|
inline
|
|
void do_copy(void* __restrict dst, const void* __restrict src, size_t n,
|
|
hsa_agent_t da, hsa_agent_t sa) {
|
|
hsa_signal_silent_store_relaxed(copy_signal, 1);
|
|
throwing_result_check(
|
|
hsa_amd_memory_async_copy(dst, da, src, sa, n, 0, nullptr, copy_signal),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
while (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_EQ, 0,
|
|
UINT64_MAX, HSA_WAIT_STATE_ACTIVE));
|
|
}
|
|
|
|
inline
|
|
void do_std_memcpy(
|
|
void* __restrict dst, const void* __restrict src, std::size_t n) {
|
|
std::memcpy(dst, src, n);
|
|
|
|
return std::atomic_thread_fence(std::memory_order_seq_cst);
|
|
}
|
|
|
|
inline
|
|
void d2h_copy(void* __restrict dst, const void* __restrict src, size_t n,
|
|
hsa_amd_pointer_info_t si) {
|
|
const auto di{info(dst)};
|
|
const auto is_locked{di.type == HSA_EXT_POINTER_TYPE_LOCKED};
|
|
|
|
if (!is_locked && si.size == is_cpu_owned) {
|
|
return do_std_memcpy(dst, src, n);
|
|
}
|
|
if (!is_locked && is_large_BAR && n <= max_d2h_std_memcpy_sz) {
|
|
return do_std_memcpy(dst, src, n);
|
|
}
|
|
if (di.type == HSA_EXT_POINTER_TYPE_HSA) {
|
|
return do_copy(dst, src, n, si.agentOwner, si.agentOwner);
|
|
}
|
|
|
|
if (is_locked) {
|
|
dst = static_cast<char*>(di.agentBaseAddress) +
|
|
(static_cast<char*>(dst) -
|
|
static_cast<char*>(di.hostBaseAddress));
|
|
do_copy(dst, src, n, si.agentOwner, si.agentOwner);
|
|
}
|
|
else if (n <= staging_sz) {
|
|
do_copy(staging_buffer.get(), src, n, si.agentOwner, si.agentOwner);
|
|
std::memcpy(dst, staging_buffer.get(), n);
|
|
}
|
|
else {
|
|
std::unique_ptr<void, void (*)(void*)> lck{
|
|
dst, [](void* p) { hsa_amd_memory_unlock(p); }};
|
|
|
|
throwing_result_check(hsa_amd_memory_lock(dst, n, &si.agentOwner, 1,
|
|
const_cast<void**>(&dst)),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
do_copy(dst, src, n, si.agentOwner, si.agentOwner);
|
|
}
|
|
}
|
|
|
|
inline
|
|
void h2d_copy(void* __restrict dst, const void* __restrict src, size_t n,
|
|
hsa_amd_pointer_info_t di) {
|
|
const auto si{info(const_cast<void*>(src))};
|
|
const auto is_locked{si.type == HSA_EXT_POINTER_TYPE_LOCKED};
|
|
|
|
if (!is_locked && di.size == is_cpu_owned) {
|
|
return do_std_memcpy(dst, src, n);
|
|
}
|
|
if (!is_locked && is_large_BAR && n <= max_h2d_std_memcpy_sz) {
|
|
return do_std_memcpy(dst, src, n);
|
|
}
|
|
if (si.type == HSA_EXT_POINTER_TYPE_HSA) {
|
|
return do_copy(dst, src, n, di.agentOwner, di.agentOwner);
|
|
}
|
|
|
|
if (is_locked) {
|
|
src = static_cast<char*>(si.agentBaseAddress) +
|
|
(static_cast<const char*>(src) -
|
|
static_cast<char*>(si.hostBaseAddress));
|
|
do_copy(dst, src, n, di.agentOwner, di.agentOwner);
|
|
}
|
|
else if (n <= staging_sz) {
|
|
std::memcpy(staging_buffer.get(), src, n);
|
|
do_copy(dst, staging_buffer.get(), n, di.agentOwner, di.agentOwner);
|
|
}
|
|
else {
|
|
std::unique_ptr<void, void (*)(void*)> lck{
|
|
const_cast<void*>(src), [](void* p) { hsa_amd_memory_unlock(p); }};
|
|
|
|
throwing_result_check(hsa_amd_memory_lock(const_cast<void*>(src), n,
|
|
&di.agentOwner, 1,
|
|
const_cast<void**>(&src)),
|
|
__FILE__, __func__, __LINE__);
|
|
|
|
do_copy(dst, src, n, di.agentOwner, di.agentOwner);
|
|
}
|
|
}
|
|
|
|
inline
|
|
void generic_copy(void* __restrict dst, const void* __restrict src, size_t n,
|
|
hsa_amd_pointer_info_t di, hsa_amd_pointer_info_t si) {
|
|
if (di.size == is_cpu_owned && si.size == is_cpu_owned) {
|
|
return do_std_memcpy(dst, src, n);
|
|
}
|
|
if (di.size == is_cpu_owned) return d2h_copy(dst, src, n, si);
|
|
if (si.size == is_cpu_owned) return h2d_copy(dst, src, n, di);
|
|
|
|
hsa_status_t res = hsa_amd_agents_allow_access(1u, &si.agentOwner,
|
|
nullptr, di.agentBaseAddress);
|
|
if (res == HSA_STATUS_SUCCESS){
|
|
return do_copy(dst, src, n, di.agentOwner, si.agentOwner);
|
|
}
|
|
// If devices do not have access then fallback mechanism will be used
|
|
// copy will be slower
|
|
throwing_result_check(hsa_memory_copy(dst,src,n), __FILE__, __func__, __LINE__);
|
|
}
|
|
|
|
inline
|
|
void memcpy_impl(void* __restrict dst, const void* __restrict src, size_t n,
|
|
hipMemcpyKind k) {
|
|
auto si{info(src)};
|
|
auto di{info(dst)};
|
|
|
|
if (!is_large_BAR){
|
|
// Pointer info takes presidence over hipMemcpyKind
|
|
// if there is mismatch b/w Memcpy kind and dst/src pointer
|
|
// E.g. dst(host pointer),src(device pointer) and hipMemcpyKind set as hipMemcpyHostToDevice
|
|
if (di.size == is_cpu_owned && si.size == is_cpu_owned)
|
|
k = hipMemcpyHostToHost;
|
|
else if (si.size == is_cpu_owned && di.size != is_cpu_owned)
|
|
k = hipMemcpyHostToDevice;
|
|
else if (di.size == is_cpu_owned && si.size != is_cpu_owned)
|
|
k = hipMemcpyDeviceToHost;
|
|
else
|
|
k = hipMemcpyDeviceToDevice;
|
|
}
|
|
switch (k) {
|
|
case hipMemcpyHostToHost: std::memcpy(dst, src, n); break;
|
|
case hipMemcpyHostToDevice: return h2d_copy(dst, src, n, di);
|
|
case hipMemcpyDeviceToHost: return d2h_copy(dst, src, n, si);
|
|
case hipMemcpyDeviceToDevice: {
|
|
hsa_status_t res = hsa_amd_agents_allow_access(1u, &si.agentOwner,
|
|
nullptr, di.agentBaseAddress);
|
|
if (res == HSA_STATUS_SUCCESS){
|
|
return do_copy(dst, src, n, di.agentOwner, si.agentOwner);
|
|
}
|
|
|
|
// If devices do not have access then fallback mechanism will be used
|
|
// copy will be slower
|
|
throwing_result_check(hsa_memory_copy(dst,src,n), __FILE__, __func__, __LINE__);
|
|
break;
|
|
}
|
|
default: return generic_copy(dst, src, n, di, si);
|
|
}
|
|
}
|
|
|
|
hipError_t memcpyAsync(void* dst, const void* src, size_t sizeBytes,
|
|
hipMemcpyKind kind, hipStream_t stream) {
|
|
if (sizeBytes == 0) return hipSuccess;
|
|
if (!dst || !src) return hipErrorInvalidValue;
|
|
|
|
try {
|
|
stream = ihipSyncAndResolveStream(stream);
|
|
|
|
if (!stream) return hipErrorInvalidValue;
|
|
|
|
stream->locked_copyAsync(dst, src, sizeBytes, kind);
|
|
}
|
|
catch (const ihipException& ex) {
|
|
return ex._code;
|
|
}
|
|
catch (const std::exception& ex) {
|
|
std::cerr << ex.what() << std::endl;
|
|
throw;
|
|
}
|
|
catch (...) {
|
|
return hipErrorUnknown;
|
|
}
|
|
|
|
return hipSuccess;
|
|
}
|
|
|
|
hipError_t memcpySync(void* dst, const void* src, size_t sizeBytes,
|
|
hipMemcpyKind kind, hipStream_t stream) {
|
|
if (sizeBytes == 0) return hipSuccess;
|
|
if (!dst || !src) return hipErrorInvalidValue;
|
|
|
|
try {
|
|
stream = ihipSyncAndResolveStream(stream);
|
|
|
|
if (!stream) return hipErrorInvalidValue;
|
|
|
|
LockedAccessor_StreamCrit_t cs{stream->criticalData()};
|
|
cs->_av.wait();
|
|
|
|
memcpy_impl(dst, src, sizeBytes, kind);
|
|
cs->_last_op_was_a_copy = true;
|
|
}
|
|
catch (const ihipException& ex) {
|
|
return ex._code;
|
|
}
|
|
catch (const std::exception& ex) {
|
|
std::cerr << ex.what() << std::endl;
|
|
throw;
|
|
}
|
|
catch (...) {
|
|
return hipErrorUnknown;
|
|
}
|
|
|
|
return hipSuccess;
|
|
}
|
|
|
|
// return 0 on success or -1 on error:
|
|
int sharePtr(void* ptr, ihipCtx_t* ctx, bool shareWithAll, unsigned hipFlags) {
|
|
int ret = 0;
|
|
|
|
auto device = ctx->getWriteableDevice();
|
|
|
|
if (shareWithAll) {
|
|
// shareWithAll memory is not mapped to any device
|
|
hc::am_memtracker_update(ptr, -1, hipFlags);
|
|
hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt + 1, g_allAgents, NULL, ptr);
|
|
tprintf(DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt);
|
|
if (s != HSA_STATUS_SUCCESS) {
|
|
ret = -1;
|
|
}
|
|
} else {
|
|
#if USE_APP_PTR_FOR_CTX
|
|
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx);
|
|
#else
|
|
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags);
|
|
#endif
|
|
int peerCnt = 0;
|
|
{
|
|
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
|
// the peerCnt always stores self so make sure the trace actually
|
|
peerCnt = crit->peerCnt();
|
|
tprintf(DB_MEM, " allow access to %d other peer(s)\n", peerCnt - 1);
|
|
if (peerCnt > 1) {
|
|
// printf ("peer self access\n");
|
|
|
|
// TODOD - remove me:
|
|
for (auto iter = crit->_peers.begin(); iter != crit->_peers.end(); iter++) {
|
|
tprintf(DB_MEM, " allow access to peer: %s%s\n", (*iter)->toString().c_str(),
|
|
(iter == crit->_peers.begin()) ? " (self)" : "");
|
|
};
|
|
|
|
hsa_status_t s =
|
|
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, ptr);
|
|
if (s != HSA_STATUS_SUCCESS) {
|
|
ret = -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
// Allocate a new pointer with am_alloc and share with all valid peers.
|
|
// Returns null-ptr if a memory error occurs (either allocation or sharing)
|
|
void* allocAndSharePtr(const char* msg, size_t sizeBytes, ihipCtx_t* ctx, bool shareWithAll,
|
|
unsigned amFlags, unsigned hipFlags, size_t alignment) {
|
|
void* ptr = nullptr;
|
|
|
|
auto device = ctx->getWriteableDevice();
|
|
|
|
#if (__hcc_workweek__ >= 17332)
|
|
if (alignment != 0) {
|
|
ptr = hc::am_aligned_alloc(sizeBytes, device->_acc, amFlags, alignment);
|
|
} else
|
|
#endif
|
|
{
|
|
ptr = hc::am_alloc(sizeBytes, device->_acc, amFlags);
|
|
}
|
|
tprintf(DB_MEM, " alloc %s ptr:%p-%p size:%zu on dev:%d\n", msg, ptr,
|
|
static_cast<char*>(ptr) + sizeBytes, sizeBytes, device->_deviceId);
|
|
|
|
if (HIP_INIT_ALLOC != -1) {
|
|
// TODO , dont' call HIP API directly here:
|
|
hipMemset(ptr, HIP_INIT_ALLOC, sizeBytes);
|
|
}
|
|
|
|
if (ptr != nullptr) {
|
|
int r = sharePtr(ptr, ctx, shareWithAll, hipFlags);
|
|
if (r != 0) {
|
|
ptr = nullptr;
|
|
}
|
|
}
|
|
|
|
return ptr;
|
|
}
|
|
|
|
hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) {
|
|
hipError_t hip_status = hipSuccess;
|
|
|
|
if (sizeBytes == 0) {
|
|
return hipSuccess;
|
|
}
|
|
|
|
if (HIP_SYNC_HOST_ALLOC) {
|
|
hipDeviceSynchronize();
|
|
}
|
|
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
if ((ctx == nullptr) || (ptr == nullptr)) {
|
|
hip_status = hipErrorInvalidValue;
|
|
} else {
|
|
unsigned trueFlags = flags;
|
|
if (flags == hipHostMallocDefault) {
|
|
// HCC/ROCM provide a modern system with unified memory and should set both of these
|
|
// flags by default:
|
|
trueFlags = hipHostMallocMapped | hipHostMallocPortable;
|
|
}
|
|
|
|
|
|
const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped |
|
|
hipHostMallocWriteCombined | hipHostMallocCoherent |
|
|
hipHostMallocNonCoherent;
|
|
|
|
|
|
const unsigned coherencyFlags = hipHostMallocCoherent | hipHostMallocNonCoherent;
|
|
|
|
if ((flags & ~supportedFlags) || ((flags & coherencyFlags) == coherencyFlags)) {
|
|
*ptr = nullptr;
|
|
// can't specify unsupported flags, can't specify both Coherent + NonCoherent
|
|
hip_status = hipErrorInvalidValue;
|
|
} else {
|
|
auto device = ctx->getWriteableDevice();
|
|
#if (__hcc_workweek__ >= 19115)
|
|
//Avoid mapping host pinned memory to all devices by HCC
|
|
unsigned amFlags = amHostUnmapped;
|
|
#else
|
|
unsigned amFlags = 0;
|
|
#endif
|
|
if (flags & hipHostMallocCoherent) {
|
|
amFlags |= amHostCoherent;
|
|
} else if (flags & hipHostMallocNonCoherent) {
|
|
amFlags |= amHostNonCoherent;
|
|
} else {
|
|
// depends on env variables:
|
|
amFlags |= HIP_HOST_COHERENT ? amHostCoherent : amHostNonCoherent;
|
|
}
|
|
|
|
|
|
*ptr = hip_internal::allocAndSharePtr(
|
|
(amFlags & amHostCoherent) ? "finegrained_host" : "pinned_host", sizeBytes, ctx,
|
|
true /*shareWithAll*/, amFlags, flags, 0);
|
|
|
|
if (sizeBytes && (*ptr == NULL)) {
|
|
hip_status = hipErrorOutOfMemory;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (HIP_SYNC_HOST_ALLOC) {
|
|
hipDeviceSynchronize();
|
|
}
|
|
return hip_status;
|
|
}
|
|
|
|
hipError_t ihipHostFree(TlsData *tls, void* ptr) {
|
|
|
|
// Synchronize to ensure all work has finished.
|
|
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits
|
|
// for all activity to finish.
|
|
|
|
hipError_t hipStatus = hipErrorInvalidValue;
|
|
if (ptr) {
|
|
hc::accelerator acc;
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
|
if (status == AM_SUCCESS) {
|
|
if (amPointerInfo._hostPointer == ptr) {
|
|
hc::am_free(ptr);
|
|
hipStatus = hipSuccess;
|
|
}
|
|
}
|
|
} else {
|
|
// free NULL pointer succeeds and is common technique to initialize runtime
|
|
hipStatus = hipSuccess;
|
|
}
|
|
|
|
return hipStatus;
|
|
}
|
|
|
|
|
|
} // end namespace hip_internal
|
|
|
|
//-------------------------------------------------------------------------------------------------
|
|
//-------------------------------------------------------------------------------------------------
|
|
// Memory
|
|
//
|
|
//
|
|
//
|
|
// HIP uses several "app*" fields HC memory tracker to track state necessary for the HIP API.
|
|
//_appId : DeviceID. For device mem, this is device where the memory is physically allocated.
|
|
// For host or registered mem, this is the current device when the memory is allocated or
|
|
// registered. This device will have a GPUVM mapping for the host mem.
|
|
//
|
|
//_appAllocationFlags : These are flags provided by the user when allocation is performed. They are
|
|
//returned to user in hipHostGetFlags and other APIs.
|
|
// TODO - add more info here when available.
|
|
//
|
|
hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
|
|
HIP_INIT_API(hipPointerGetAttributes, attributes, ptr);
|
|
|
|
hipError_t e = hipSuccess;
|
|
if ((attributes == nullptr) || (ptr == nullptr)) {
|
|
e = hipErrorInvalidValue;
|
|
} else {
|
|
hc::accelerator acc;
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
|
if (status == AM_SUCCESS) {
|
|
attributes->memoryType =
|
|
amPointerInfo._isInDeviceMem ? hipMemoryTypeDevice : hipMemoryTypeHost;
|
|
attributes->hostPointer = amPointerInfo._hostPointer;
|
|
attributes->devicePointer = amPointerInfo._devicePointer;
|
|
attributes->isManaged = 0;
|
|
if (attributes->memoryType == hipMemoryTypeHost) {
|
|
attributes->hostPointer = (void*)ptr;
|
|
}
|
|
if (attributes->memoryType == hipMemoryTypeDevice) {
|
|
attributes->devicePointer = (void*)ptr;
|
|
}
|
|
attributes->allocationFlags = amPointerInfo._appAllocationFlags;
|
|
attributes->device = amPointerInfo._appId;
|
|
|
|
if (attributes->device < -1) {
|
|
e = hipErrorInvalidDevice;
|
|
}
|
|
} else {
|
|
attributes->memoryType = hipMemoryTypeDevice;
|
|
attributes->hostPointer = 0;
|
|
attributes->devicePointer = 0;
|
|
attributes->device = -2;
|
|
attributes->isManaged = 0;
|
|
attributes->allocationFlags = 0;
|
|
|
|
e = hipErrorInvalidValue;
|
|
}
|
|
}
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
|
|
hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) {
|
|
HIP_INIT_API(hipHostGetDevicePointer, devicePointer, hostPointer, flags);
|
|
|
|
hipError_t e = hipSuccess;
|
|
|
|
// Flags must be 0:
|
|
if ((flags != 0) || (devicePointer == nullptr) || (hostPointer == nullptr)) {
|
|
e = hipErrorInvalidValue;
|
|
} else {
|
|
hc::accelerator acc;
|
|
*devicePointer = NULL;
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPointer);
|
|
if (status == AM_SUCCESS) {
|
|
*devicePointer =
|
|
static_cast<char*>(amPointerInfo._devicePointer) +
|
|
(static_cast<char*>(hostPointer) - static_cast<char*>(amPointerInfo._hostPointer));
|
|
tprintf(DB_MEM, " host_ptr=%p returned device_pointer=%p\n", hostPointer,
|
|
*devicePointer);
|
|
} else {
|
|
e = hipErrorOutOfMemory;
|
|
}
|
|
}
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
|
|
hipError_t hipMalloc(void** ptr, size_t sizeBytes) {
|
|
HIP_INIT_SPECIAL_API(hipMalloc, (TRACE_MEM), ptr, sizeBytes);
|
|
HIP_SET_DEVICE();
|
|
hipError_t hip_status = hipSuccess;
|
|
|
|
if (sizeBytes == 0) {
|
|
if (ptr) *ptr = NULL;
|
|
return ihipLogStatus(hipSuccess);
|
|
}
|
|
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
// return NULL pointer when malloc size is 0
|
|
if ( nullptr == ctx || nullptr == ptr) {
|
|
hip_status = hipErrorInvalidValue;
|
|
} else {
|
|
auto device = ctx->getWriteableDevice();
|
|
*ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false /*shareWithAll*/,
|
|
0 /*amFlags*/, 0 /*hipFlags*/, 0);
|
|
|
|
if (sizeBytes && (*ptr == NULL)) {
|
|
hip_status = hipErrorOutOfMemory;
|
|
}
|
|
}
|
|
|
|
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) {
|
|
HIP_INIT_SPECIAL_API(hipExtMallocWithFlags, (TRACE_MEM), ptr, sizeBytes, flags);
|
|
HIP_SET_DEVICE();
|
|
|
|
#if (__hcc_workweek__ >= 19115)
|
|
if (sizeBytes == 0) {
|
|
if (ptr) *ptr = NULL;
|
|
return ihipLogStatus(hipSuccess);
|
|
}
|
|
|
|
hipError_t hip_status = hipSuccess;
|
|
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
if ((ctx == nullptr) || (ptr == nullptr)) {
|
|
hip_status = hipErrorInvalidValue;
|
|
} else {
|
|
unsigned amFlags = 0;
|
|
if (flags & hipDeviceMallocFinegrained) {
|
|
amFlags = amDeviceFinegrained;
|
|
} else if (flags != hipDeviceMallocDefault) {
|
|
hip_status = hipErrorInvalidValue;
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
auto device = ctx->getWriteableDevice();
|
|
*ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false /*shareWithAll*/,
|
|
amFlags /*amFlags*/, 0 /*hipFlags*/, 0);
|
|
|
|
if (sizeBytes && (*ptr == NULL)) {
|
|
hip_status = hipErrorOutOfMemory;
|
|
}
|
|
}
|
|
#else
|
|
hipError_t hip_status = hipErrorOutOfMemory;
|
|
#endif
|
|
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
|
|
hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
|
HIP_INIT_SPECIAL_API(hipHostMalloc, (TRACE_MEM), ptr, sizeBytes, flags);
|
|
HIP_SET_DEVICE();
|
|
if (sizeBytes == 0) {
|
|
return ihipLogStatus(hipSuccess);
|
|
}
|
|
hipError_t hip_status = hipSuccess;
|
|
hip_status = hip_internal::ihipHostMalloc(tls, ptr, sizeBytes, flags);
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) {
|
|
HIP_INIT_SPECIAL_API(hipMallocManaged, (TRACE_MEM), devPtr, size, flags);
|
|
HIP_SET_DEVICE();
|
|
if (size == 0) {
|
|
return ihipLogStatus(hipSuccess);
|
|
}
|
|
hipError_t hip_status = hipSuccess;
|
|
if(flags != hipMemAttachGlobal)
|
|
hip_status = hipErrorInvalidValue;
|
|
else
|
|
hip_status = hip_internal::ihipHostMalloc(tls, devPtr, size, hipHostMallocDefault);
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
// Deprecated function:
|
|
hipError_t hipMallocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); }
|
|
|
|
// Deprecated function:
|
|
hipError_t hipMemAllocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); }
|
|
|
|
// Deprecated function:
|
|
hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
|
return hipHostMalloc(ptr, sizeBytes, flags);
|
|
};
|
|
|
|
hipError_t allocImage(TlsData* tls,hsa_ext_image_geometry_t geometry, int width, int height, int depth, hsa_ext_image_channel_order_t channelOrder, hsa_ext_image_channel_type_t channelType,void ** ptr, hsa_ext_image_data_info_t &imageInfo, int array_size __dparm(0)) {
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
if (ctx) {
|
|
hc::accelerator acc = ctx->getDevice()->_acc;
|
|
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
|
if (!agent)
|
|
return hipErrorInvalidHandle;
|
|
size_t allocGranularity = 0;
|
|
hsa_amd_memory_pool_t* allocRegion = static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
|
|
hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity);
|
|
|
|
size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(width, IMAGE_PITCH_ALIGNMENT);
|
|
if(HSA_EXT_IMAGE_GEOMETRY_2DA == geometry)
|
|
imageInfo.size = rowPitch * (height == 0 ? 1 : height) * (array_size == 0 ? 1 : array_size) ;
|
|
else
|
|
imageInfo.size = rowPitch * (height == 0 ? 1 : height) * (depth == 0 ? 1 : depth) ;
|
|
|
|
imageInfo.alignment = IMAGE_PITCH_ALIGNMENT;
|
|
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
|
|
const unsigned am_flags = 0;
|
|
*ptr = hip_internal::allocAndSharePtr("device_array", imageInfo.size, ctx,
|
|
false /*shareWithAll*/, am_flags, 0, alignment);
|
|
if (*ptr == NULL) {
|
|
return hipErrorOutOfMemory;
|
|
}
|
|
return hipSuccess;
|
|
}
|
|
else {
|
|
return hipErrorOutOfMemory;
|
|
}
|
|
}
|
|
|
|
// width in bytes
|
|
hipError_t ihipMallocPitch(TlsData* tls, void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) {
|
|
hipError_t hip_status = hipSuccess;
|
|
if(ptr==NULL || pitch == NULL){
|
|
return hipErrorInvalidValue;
|
|
}
|
|
hsa_ext_image_data_info_t imageInfo;
|
|
if (depth == 0)
|
|
hip_status = allocImage(tls,HSA_EXT_IMAGE_GEOMETRY_2D,width,height,0,HSA_EXT_IMAGE_CHANNEL_ORDER_R,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32,ptr,imageInfo);
|
|
else
|
|
hip_status = allocImage(tls,HSA_EXT_IMAGE_GEOMETRY_3D,width,height,depth,HSA_EXT_IMAGE_CHANNEL_ORDER_R,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32,ptr,imageInfo);
|
|
|
|
if(hip_status == hipSuccess)
|
|
*pitch = imageInfo.size/(height == 0 ? 1 : height)/(depth == 0 ? 1 : depth);
|
|
|
|
return hip_status;
|
|
}
|
|
|
|
// width in bytes
|
|
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
|
|
HIP_INIT_SPECIAL_API(hipMallocPitch, (TRACE_MEM), ptr, pitch, width, height);
|
|
HIP_SET_DEVICE();
|
|
hipError_t hip_status = hipSuccess;
|
|
|
|
if (width == 0 || height == 0) return ihipLogStatus(hipErrorUnknown);
|
|
|
|
hip_status = ihipMallocPitch(tls, ptr, pitch, width, height, 0);
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes){
|
|
HIP_INIT_SPECIAL_API(hipMemAllocPitch, (TRACE_MEM), dptr, pitch, widthInBytes, height,elementSizeBytes);
|
|
HIP_SET_DEVICE();
|
|
|
|
if (widthInBytes == 0 || height == 0) return ihipLogStatus(hipErrorInvalidValue);
|
|
|
|
return ihipLogStatus(ihipMallocPitch(tls, dptr, pitch, widthInBytes, height, 0));
|
|
}
|
|
|
|
hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
|
|
HIP_INIT_API(hipMalloc3D, pitchedDevPtr, &extent);
|
|
HIP_SET_DEVICE();
|
|
hipError_t hip_status = hipSuccess;
|
|
|
|
if (extent.width == 0 || extent.height == 0) return ihipLogStatus(hipErrorUnknown);
|
|
if (!pitchedDevPtr) return ihipLogStatus(hipErrorInvalidValue);
|
|
void* ptr;
|
|
size_t pitch;
|
|
|
|
hip_status =
|
|
ihipMallocPitch(tls, &pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth);
|
|
if (hip_status == hipSuccess) {
|
|
pitchedDevPtr->pitch = pitch;
|
|
pitchedDevPtr->xsize = extent.width;
|
|
pitchedDevPtr->ysize = extent.height;
|
|
}
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) {
|
|
hipChannelFormatDesc cd;
|
|
cd.x = x;
|
|
cd.y = y;
|
|
cd.z = z;
|
|
cd.w = w;
|
|
cd.f = f;
|
|
return cd;
|
|
}
|
|
|
|
extern void getChannelOrderAndType(const hipChannelFormatDesc& desc,
|
|
enum hipTextureReadMode readMode,
|
|
hsa_ext_image_channel_order_t* channelOrder,
|
|
hsa_ext_image_channel_type_t* channelType);
|
|
|
|
hipError_t GetImageInfo(hsa_ext_image_geometry_t geometry,int width, int height, int depth, hipChannelFormatDesc desc, hsa_ext_image_data_info_t &imageInfo,int array_size __dparm(0))
|
|
{
|
|
hsa_ext_image_channel_order_t channelOrder;
|
|
hsa_ext_image_channel_type_t channelType;
|
|
getChannelOrderAndType(desc, hipReadModeElementType, &channelOrder, &channelType);
|
|
|
|
size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(width, IMAGE_PITCH_ALIGNMENT);
|
|
if(HSA_EXT_IMAGE_GEOMETRY_2DA == geometry)
|
|
imageInfo.size = rowPitch * (height == 0 ? 1 : height) * (array_size == 0 ? 1 : array_size);
|
|
else
|
|
imageInfo.size = rowPitch * (height == 0 ? 1 : height) * (depth == 0 ? 1 : depth);
|
|
imageInfo.alignment = IMAGE_PITCH_ALIGNMENT;
|
|
return hipSuccess;
|
|
}
|
|
|
|
hipError_t GetImageInfo(hsa_ext_image_geometry_t geometry,size_t width, size_t height, size_t depth, hsa_ext_image_channel_order_t channelOrder, hsa_ext_image_channel_type_t channelType, hsa_ext_image_data_info_t &imageInfo,size_t array_size __dparm(0))
|
|
{
|
|
|
|
size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(width, IMAGE_PITCH_ALIGNMENT);
|
|
|
|
if(HSA_EXT_IMAGE_GEOMETRY_2DA == geometry)
|
|
imageInfo.size = rowPitch * (height == 0 ? 1 : height) * (array_size == 0 ? 1 : array_size);
|
|
else
|
|
imageInfo.size = rowPitch * (height == 0 ? 1 : height) * (depth == 0 ? 1 : depth);
|
|
|
|
imageInfo.alignment = IMAGE_PITCH_ALIGNMENT;
|
|
|
|
return hipSuccess;
|
|
}
|
|
|
|
hipError_t ihipArrayToImageFormat(hipArray_Format format,hsa_ext_image_channel_type_t &channelType) {
|
|
switch (format) {
|
|
case HIP_AD_FORMAT_UNSIGNED_INT8:
|
|
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
|
|
break;
|
|
case HIP_AD_FORMAT_UNSIGNED_INT16:
|
|
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16;
|
|
break;
|
|
case HIP_AD_FORMAT_UNSIGNED_INT32:
|
|
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
|
|
break;
|
|
case HIP_AD_FORMAT_SIGNED_INT8:
|
|
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8;
|
|
break;
|
|
case HIP_AD_FORMAT_SIGNED_INT16:
|
|
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16;
|
|
break;
|
|
case HIP_AD_FORMAT_SIGNED_INT32:
|
|
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
|
|
break;
|
|
case HIP_AD_FORMAT_HALF:
|
|
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT;
|
|
break;
|
|
case HIP_AD_FORMAT_FLOAT:
|
|
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
|
|
break;
|
|
default:
|
|
return hipErrorUnknown;
|
|
break;
|
|
}
|
|
return hipSuccess;
|
|
}
|
|
|
|
hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) {
|
|
HIP_INIT_SPECIAL_API(hipArrayCreate, (TRACE_MEM), array, pAllocateArray);
|
|
HIP_SET_DEVICE();
|
|
hipError_t hip_status = hipSuccess;
|
|
if (pAllocateArray->Width > 0) {
|
|
*array = (hipArray*)malloc(sizeof(hipArray));
|
|
array[0]->width = pAllocateArray->Width;
|
|
array[0]->height = pAllocateArray->Height;
|
|
array[0]->Format = pAllocateArray->Format;
|
|
array[0]->NumChannels = pAllocateArray->NumChannels;
|
|
array[0]->isDrv = true;
|
|
array[0]->textureType = hipTextureType2D;
|
|
void** ptr = &array[0]->data;
|
|
hsa_ext_image_channel_type_t channelType;
|
|
hsa_ext_image_channel_order_t channelOrder;
|
|
|
|
hip_status = ihipArrayToImageFormat(pAllocateArray->Format,channelType);
|
|
if(hipSuccess != hip_status)
|
|
return ihipLogStatus(hip_status);
|
|
|
|
if (pAllocateArray->NumChannels == 4) {
|
|
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
|
|
} else if (pAllocateArray->NumChannels == 2) {
|
|
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
|
|
} else if (pAllocateArray->NumChannels == 1) {
|
|
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
|
|
}
|
|
hsa_ext_image_data_info_t imageInfo;
|
|
return ihipLogStatus(allocImage(tls,HSA_EXT_IMAGE_GEOMETRY_2D,pAllocateArray->Width,
|
|
pAllocateArray->Height,0,channelOrder,channelType,ptr,imageInfo));
|
|
} else {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
}
|
|
|
|
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width,
|
|
size_t height, unsigned int flags) {
|
|
HIP_INIT_SPECIAL_API(hipMallocArray, (TRACE_MEM), array, desc, width, height, flags);
|
|
HIP_SET_DEVICE();
|
|
hipError_t hip_status = hipSuccess;
|
|
if (width > 0) {
|
|
*array = (hipArray*)malloc(sizeof(hipArray));
|
|
array[0]->type = flags;
|
|
array[0]->width = width;
|
|
array[0]->height = height;
|
|
array[0]->depth = 1;
|
|
array[0]->desc = *desc;
|
|
array[0]->isDrv = false;
|
|
array[0]->textureType = hipTextureType2D;
|
|
void** ptr = &array[0]->data;
|
|
|
|
hsa_ext_image_channel_order_t channelOrder;
|
|
hsa_ext_image_channel_type_t channelType;
|
|
getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType);
|
|
hsa_ext_image_data_info_t imageInfo;
|
|
switch (flags) {
|
|
case hipArrayLayered:
|
|
case hipArrayCubemap:
|
|
case hipArraySurfaceLoadStore:
|
|
case hipArrayTextureGather:
|
|
assert(0);
|
|
break;
|
|
case hipArrayDefault:
|
|
default:
|
|
hip_status = allocImage(tls,HSA_EXT_IMAGE_GEOMETRY_2D,width,height,0,channelOrder,channelType,ptr,imageInfo);
|
|
break;
|
|
}
|
|
} else {
|
|
hip_status = hipErrorInvalidValue;
|
|
}
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray) {
|
|
HIP_INIT_SPECIAL_API(hipArray3DCreate, (TRACE_MEM), array, pAllocateArray);
|
|
hipError_t hip_status = hipSuccess;
|
|
|
|
*array = (hipArray*)malloc(sizeof(hipArray));
|
|
array[0]->type = pAllocateArray->Flags;
|
|
array[0]->width = pAllocateArray->Width;
|
|
array[0]->height = pAllocateArray->Height;
|
|
array[0]->depth = pAllocateArray->Depth;
|
|
array[0]->Format = pAllocateArray->Format;
|
|
array[0]->NumChannels = pAllocateArray->NumChannels;
|
|
array[0]->isDrv = true;
|
|
void** ptr = &array[0]->data;
|
|
|
|
hsa_ext_image_channel_order_t channelOrder;
|
|
if (pAllocateArray->NumChannels == 4) {
|
|
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
|
|
} else if (pAllocateArray->NumChannels == 2) {
|
|
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
|
|
} else if (pAllocateArray->NumChannels == 1) {
|
|
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
|
|
}
|
|
|
|
hsa_ext_image_channel_type_t channelType;
|
|
hip_status = ihipArrayToImageFormat(pAllocateArray->Format,channelType);
|
|
hsa_ext_image_data_info_t imageInfo;
|
|
switch (pAllocateArray->Flags) {
|
|
case hipArrayLayered:
|
|
hip_status = allocImage(tls,HSA_EXT_IMAGE_GEOMETRY_2DA,pAllocateArray->Width,pAllocateArray->Height,0,
|
|
channelOrder,channelType,ptr,imageInfo,pAllocateArray->Depth);
|
|
array[0]->textureType = hipTextureType2DLayered;
|
|
break;
|
|
case hipArraySurfaceLoadStore:
|
|
case hipArrayTextureGather:
|
|
assert(0);
|
|
break;
|
|
case hipArrayDefault:
|
|
case hipArrayCubemap:
|
|
default:
|
|
array[0]->type = hipArrayCubemap;
|
|
hip_status = allocImage(tls,HSA_EXT_IMAGE_GEOMETRY_3D,pAllocateArray->Width,pAllocateArray->Height,
|
|
pAllocateArray->Depth,channelOrder,channelType,ptr,imageInfo);
|
|
array[0]->textureType = hipTextureType3D;
|
|
break;
|
|
}
|
|
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc,
|
|
struct hipExtent extent, unsigned int flags) {
|
|
|
|
HIP_INIT_API(hipMalloc3DArray, array, desc, &extent, flags);
|
|
HIP_SET_DEVICE();
|
|
hipError_t hip_status = hipSuccess;
|
|
|
|
if(array==NULL ){
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
*array = (hipArray*)malloc(sizeof(hipArray));
|
|
array[0]->type = flags;
|
|
array[0]->width = extent.width;
|
|
array[0]->height = extent.height;
|
|
array[0]->depth = extent.depth;
|
|
array[0]->desc = *desc;
|
|
array[0]->isDrv = false;
|
|
void** ptr = &array[0]->data;
|
|
hsa_ext_image_channel_order_t channelOrder;
|
|
hsa_ext_image_channel_type_t channelType;
|
|
getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType);
|
|
hsa_ext_image_data_info_t imageInfo;
|
|
switch (flags) {
|
|
case hipArrayLayered:
|
|
hip_status = allocImage(tls,HSA_EXT_IMAGE_GEOMETRY_2DA,extent.width,extent.height,0,channelOrder,channelType,ptr,imageInfo,extent.depth);
|
|
array[0]->textureType = hipTextureType2DLayered;
|
|
break;
|
|
case hipArraySurfaceLoadStore:
|
|
case hipArrayTextureGather:
|
|
assert(0);
|
|
break;
|
|
case hipArrayDefault:
|
|
case hipArrayCubemap:
|
|
default:
|
|
array[0]->type = hipArrayCubemap;
|
|
hip_status = allocImage(tls,HSA_EXT_IMAGE_GEOMETRY_3D,extent.width,extent.height,extent.depth,channelOrder,channelType,ptr,imageInfo);
|
|
array[0]->textureType = hipTextureType3D;
|
|
break;
|
|
}
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
|
|
HIP_INIT_API(hipHostGetFlags, flagsPtr, hostPtr);
|
|
|
|
hipError_t hip_status = hipSuccess;
|
|
|
|
hc::accelerator acc;
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
|
|
if (status == AM_SUCCESS) {
|
|
*flagsPtr = amPointerInfo._appAllocationFlags;
|
|
//0 is valid flag hipHostMallocDefault, and during hipHostMalloc if unsupported flags are passed as parameter it throws error
|
|
hip_status = hipSuccess;
|
|
tprintf(DB_MEM, " %s: host ptr=%p\n", __func__, hostPtr);
|
|
} else {
|
|
hip_status = hipErrorInvalidValue;
|
|
}
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
|
|
// TODO - need to fix several issues here related to P2P access, host memory fallback.
|
|
hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) {
|
|
HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags);
|
|
|
|
hipError_t hip_status = hipSuccess;
|
|
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
if (hostPtr == NULL) {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
|
|
hc::accelerator acc;
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t am_status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
|
|
|
|
if (am_status == AM_SUCCESS) {
|
|
hip_status = hipErrorHostMemoryAlreadyRegistered;
|
|
} else {
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
if (hostPtr == NULL) {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
// TODO-test : multi-gpu access to registered host memory.
|
|
if (ctx) {
|
|
if ((flags == hipHostRegisterDefault) || (flags & hipHostRegisterPortable) ||
|
|
(flags & hipHostRegisterMapped) || (flags == hipExtHostRegisterCoarseGrained)) {
|
|
auto device = ctx->getWriteableDevice();
|
|
std::vector<hc::accelerator> vecAcc;
|
|
for (int i = 0; i < g_deviceCnt; i++) {
|
|
vecAcc.push_back(ihipGetDevice(i)->_acc);
|
|
}
|
|
#if (__hcc_workweek__ >= 19183)
|
|
if(flags & hipExtHostRegisterCoarseGrained) {
|
|
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0],
|
|
vecAcc.size());
|
|
} else {
|
|
am_status = hc::am_memory_host_lock_with_flag(device->_acc, hostPtr, sizeBytes, &vecAcc[0],
|
|
vecAcc.size());
|
|
}
|
|
#else
|
|
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0],
|
|
vecAcc.size());
|
|
#endif
|
|
if ( am_status == AM_SUCCESS ) {
|
|
am_status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
|
|
|
|
if ( am_status == AM_SUCCESS ) {
|
|
void *devPtr = amPointerInfo._devicePointer;
|
|
#if USE_APP_PTR_FOR_CTX
|
|
hc::am_memtracker_update(hostPtr, device->_deviceId, flags, ctx);
|
|
hc::am_memtracker_update(devPtr, device->_deviceId, flags, ctx);
|
|
#else
|
|
hc::am_memtracker_update(hostPtr, device->_deviceId, flags);
|
|
hc::am_memtracker_update(devPtr, device->_deviceId, flags);
|
|
#endif
|
|
tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__,
|
|
hostPtr, vecAcc.size());
|
|
};
|
|
};
|
|
if (am_status == AM_SUCCESS) {
|
|
hip_status = hipSuccess;
|
|
} else {
|
|
hip_status = hipErrorOutOfMemory;
|
|
}
|
|
} else {
|
|
hip_status = hipErrorInvalidValue;
|
|
}
|
|
}
|
|
}
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
hipError_t hipHostUnregister(void* hostPtr) {
|
|
HIP_INIT_API(hipHostUnregister, hostPtr);
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
hipError_t hip_status = hipSuccess;
|
|
if (hostPtr == NULL) {
|
|
hip_status = hipErrorInvalidValue;
|
|
} else {
|
|
auto device = ctx->getWriteableDevice();
|
|
am_status_t am_status = hc::am_memory_host_unlock(device->_acc, hostPtr);
|
|
tprintf(DB_MEM, " %s unregistered ptr=%p\n", __func__, hostPtr);
|
|
if (am_status != AM_SUCCESS) {
|
|
hip_status = hipErrorHostMemoryNotRegistered;
|
|
}
|
|
}
|
|
return ihipLogStatus(hip_status);
|
|
}
|
|
|
|
namespace hip_impl {
|
|
hipError_t hipMemcpyToSymbol(void* dst, const void* src, size_t count,
|
|
size_t offset, hipMemcpyKind kind,
|
|
const char* symbol_name) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyToSymbol, (TRACE_MCMD), symbol_name, src,
|
|
count, offset, kind);
|
|
|
|
tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst);
|
|
|
|
if (count == 0) return ihipLogStatus(hipSuccess);
|
|
if (dst == nullptr) {
|
|
return ihipLogStatus(hipErrorInvalidSymbol);
|
|
}
|
|
|
|
if (kind == hipMemcpyDeviceToHost || kind == hipMemcpyHostToHost) {
|
|
return ihipLogStatus(hipErrorInvalidMemcpyDirection);
|
|
} else if (kind == hipMemcpyDeviceToDevice) {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
|
|
return ihipLogStatus(hip_internal::memcpySync(static_cast<char*>(dst)+offset, src, count, kind,
|
|
hipStreamNull));
|
|
}
|
|
|
|
hipError_t hipMemcpyFromSymbol(void* dst, const void* src, size_t count,
|
|
size_t offset, hipMemcpyKind kind,
|
|
const char* symbol_name) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyFromSymbol, (TRACE_MCMD), symbol_name, dst,
|
|
count, offset, kind);
|
|
|
|
tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst);
|
|
|
|
if (count == 0) return ihipLogStatus(hipSuccess);
|
|
if (src == nullptr || dst == nullptr) {
|
|
return ihipLogStatus(hipErrorInvalidSymbol);
|
|
}
|
|
|
|
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyHostToHost) {
|
|
return ihipLogStatus(hipErrorInvalidMemcpyDirection);
|
|
} else if (kind == hipMemcpyDeviceToDevice) {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
|
|
return ihipLogStatus(hip_internal::memcpySync(dst, static_cast<const char*>(src)+offset, count, kind,
|
|
hipStreamNull));
|
|
}
|
|
|
|
|
|
hipError_t hipMemcpyToSymbolAsync(void* dst, const void* src, size_t count,
|
|
size_t offset, hipMemcpyKind kind,
|
|
hipStream_t stream, const char* symbol_name) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyToSymbolAsync, (TRACE_MCMD), symbol_name, src,
|
|
count, offset, kind, stream);
|
|
|
|
tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst);
|
|
|
|
if (count == 0) return ihipLogStatus(hipSuccess);
|
|
if (dst == nullptr) {
|
|
return ihipLogStatus(hipErrorInvalidSymbol);
|
|
}
|
|
|
|
if (kind == hipMemcpyDeviceToHost || kind == hipMemcpyHostToHost) {
|
|
return ihipLogStatus(hipErrorInvalidMemcpyDirection);
|
|
} else if (kind == hipMemcpyDeviceToDevice) {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
|
|
hipError_t e = hipSuccess;
|
|
if (stream) {
|
|
try {
|
|
hip_internal::memcpyAsync(static_cast<char*>(dst)+offset, src, count, kind, stream);
|
|
} catch (ihipException& ex) {
|
|
e = ex._code;
|
|
}
|
|
} else {
|
|
e = hipErrorInvalidValue;
|
|
}
|
|
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* src, size_t count,
|
|
size_t offset, hipMemcpyKind kind,
|
|
hipStream_t stream, const char* symbol_name) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyFromSymbolAsync, (TRACE_MCMD), symbol_name,
|
|
dst, count, offset, kind, stream);
|
|
|
|
tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, src);
|
|
|
|
if (count == 0) return ihipLogStatus(hipSuccess);
|
|
if (src == nullptr || dst == nullptr) {
|
|
return ihipLogStatus(hipErrorInvalidSymbol);
|
|
}
|
|
|
|
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyHostToHost) {
|
|
return ihipLogStatus(hipErrorInvalidMemcpyDirection);
|
|
} else if (kind == hipMemcpyDeviceToDevice) {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
|
|
hipError_t e = hipSuccess;
|
|
stream = ihipSyncAndResolveStream(stream);
|
|
if (stream) {
|
|
try {
|
|
hip_internal::memcpyAsync(dst, static_cast<const char*>(src)+offset, count, kind, stream);
|
|
} catch (ihipException& ex) {
|
|
e = ex._code;
|
|
}
|
|
} else {
|
|
e = hipErrorInvalidValue;
|
|
}
|
|
|
|
return ihipLogStatus(e);
|
|
}
|
|
} // Namespace hip_impl.
|
|
|
|
//---
|
|
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpy, (TRACE_MCMD), dst, src, sizeBytes, kind);
|
|
|
|
return ihipLogStatus(hip_internal::memcpySync(dst, src, sizeBytes, kind,
|
|
hipStreamNull));
|
|
}
|
|
|
|
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyHtoD, (TRACE_MCMD), dst, src, sizeBytes);
|
|
|
|
return ihipLogStatus(hip_internal::memcpySync(dst, src, sizeBytes,
|
|
hipMemcpyHostToDevice,
|
|
hipStreamNull));
|
|
}
|
|
|
|
hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyDtoH, (TRACE_MCMD), dst, src, sizeBytes);
|
|
|
|
return ihipLogStatus(hip_internal::memcpySync(dst, src, sizeBytes,
|
|
hipMemcpyDeviceToHost,
|
|
hipStreamNull));
|
|
}
|
|
|
|
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyDtoD, (TRACE_MCMD), dst, src, sizeBytes);
|
|
|
|
return ihipLogStatus(hip_internal::memcpySync(dst, src, sizeBytes,
|
|
hipMemcpyDeviceToDevice,
|
|
hipStreamNull));
|
|
}
|
|
|
|
hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyHtoH, (TRACE_MCMD), dst, src, sizeBytes);
|
|
|
|
return ihipLogStatus(hip_internal::memcpySync(dst, src, sizeBytes,
|
|
hipMemcpyHostToHost,
|
|
hipStreamNull));
|
|
}
|
|
|
|
hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes,
|
|
hipMemcpyKind kind, hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyWithStream, (TRACE_MCMD), dst, src, sizeBytes,
|
|
kind, stream);
|
|
|
|
return ihipLogStatus(hip_internal::memcpySync(dst, src, sizeBytes, kind,
|
|
stream));
|
|
}
|
|
|
|
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
|
hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyAsync, (TRACE_MCMD), dst, src, sizeBytes, kind, stream);
|
|
|
|
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, kind, stream));
|
|
}
|
|
|
|
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyHtoDAsync, (TRACE_MCMD), dst, src, sizeBytes, stream);
|
|
|
|
return ihipLogStatus(
|
|
hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyHostToDevice, stream));
|
|
}
|
|
|
|
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes,
|
|
hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyDtoDAsync, (TRACE_MCMD), dst, src, sizeBytes, stream);
|
|
|
|
return ihipLogStatus(
|
|
hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream));
|
|
}
|
|
|
|
hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyDtoHAsync, (TRACE_MCMD), dst, src, sizeBytes, stream);
|
|
|
|
return ihipLogStatus(
|
|
hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToHost, stream));
|
|
}
|
|
|
|
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
|
|
size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpy2DToArray, (TRACE_MCMD), dst, wOffset, hOffset, src, spitch, width, height, kind);
|
|
|
|
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
|
|
|
hipError_t e = hipSuccess;
|
|
|
|
size_t byteSize;
|
|
if (dst) {
|
|
switch (dst[0].desc.f) {
|
|
case hipChannelFormatKindSigned:
|
|
byteSize = sizeof(int);
|
|
break;
|
|
case hipChannelFormatKindUnsigned:
|
|
byteSize = sizeof(unsigned int);
|
|
break;
|
|
case hipChannelFormatKindFloat:
|
|
byteSize = sizeof(float);
|
|
break;
|
|
case hipChannelFormatKindNone:
|
|
byteSize = sizeof(size_t);
|
|
break;
|
|
default:
|
|
byteSize = 0;
|
|
break;
|
|
}
|
|
} else {
|
|
return ihipLogStatus(hipErrorUnknown);
|
|
}
|
|
|
|
if ((wOffset + width > (dst->width * byteSize)) || width > spitch) {
|
|
return ihipLogStatus(hipErrorUnknown);
|
|
}
|
|
|
|
size_t src_w = spitch;
|
|
size_t dst_w = (dst->width) * byteSize;
|
|
|
|
try {
|
|
for (int i = 0; i < height; ++i) {
|
|
stream->locked_copySync((unsigned char*)dst->data + i * dst_w,
|
|
(unsigned char*)src + i * src_w, width, kind);
|
|
}
|
|
} catch (ihipException& ex) {
|
|
e = ex._code;
|
|
}
|
|
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
|
|
size_t count, hipMemcpyKind kind) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyToArray, (TRACE_MCMD), dst, wOffset, hOffset, src, count, kind);
|
|
|
|
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
|
|
|
hipError_t e = hipSuccess;
|
|
|
|
try {
|
|
stream->locked_copySync((char*)dst->data + wOffset, src, count, kind);
|
|
} catch (ihipException& ex) {
|
|
e = ex._code;
|
|
}
|
|
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset,
|
|
size_t count, hipMemcpyKind kind) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyFromArray, (TRACE_MCMD), dst, srcArray, wOffset, hOffset, count, kind);
|
|
|
|
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
|
|
|
hipError_t e = hipSuccess;
|
|
|
|
try {
|
|
stream->locked_copySync((char*)dst, (char*)srcArray->data + wOffset, count, kind);
|
|
} catch (ihipException& ex) {
|
|
e = ex._code;
|
|
}
|
|
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyHtoA, (TRACE_MCMD), dstArray, dstOffset, srcHost, count);
|
|
|
|
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
|
|
|
hipError_t e = hipSuccess;
|
|
try {
|
|
stream->locked_copySync((char*)dstArray->data + dstOffset, srcHost, count,
|
|
hipMemcpyHostToDevice);
|
|
} catch (ihipException& ex) {
|
|
e = ex._code;
|
|
}
|
|
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyAtoH, (TRACE_MCMD), dst, srcArray, srcOffset, count);
|
|
|
|
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
|
|
|
hipError_t e = hipSuccess;
|
|
|
|
try {
|
|
stream->locked_copySync((char*)dst, (char*)srcArray->data + srcOffset, count,
|
|
hipMemcpyDeviceToHost);
|
|
} catch (ihipException& ex) {
|
|
e = ex._code;
|
|
}
|
|
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
int getByteSizeFromFormat(const hipChannelFormatDesc& desc){
|
|
int byteSize =0;
|
|
switch (desc.f) {
|
|
case hipChannelFormatKindUnsigned:
|
|
switch (desc.x) {
|
|
case 32:
|
|
byteSize = sizeof(uint32_t);
|
|
break;
|
|
case 16:
|
|
byteSize = sizeof(uint16_t);
|
|
break;
|
|
case 8:
|
|
byteSize = sizeof(uint8_t);
|
|
break;
|
|
default:
|
|
byteSize = sizeof(uint32_t);
|
|
}
|
|
break;
|
|
case hipChannelFormatKindSigned:
|
|
switch (desc.x) {
|
|
case 32:
|
|
byteSize = sizeof(int32_t);
|
|
break;
|
|
case 16:
|
|
byteSize = sizeof(int16_t);
|
|
break;
|
|
case 8:
|
|
byteSize = sizeof(int8_t);
|
|
break;
|
|
default:
|
|
byteSize = sizeof(int32_t);
|
|
}
|
|
break;
|
|
case hipChannelFormatKindFloat:
|
|
switch (desc.x) {
|
|
case 32:
|
|
byteSize = sizeof(float);
|
|
break;
|
|
case 16:
|
|
byteSize = sizeof(_Float16);
|
|
break;
|
|
default:
|
|
byteSize = sizeof(float);
|
|
}
|
|
break;
|
|
case hipChannelFormatKindNone:
|
|
default:
|
|
break;
|
|
}
|
|
return byteSize;
|
|
}
|
|
|
|
hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync) {
|
|
hipError_t e = hipSuccess;
|
|
if(p) {
|
|
size_t dstByteSize, srcByteSize, copyWidth, copyHeight, copyDepth, widthInBytes, srcPitch, dstPitch, srcYsize, dstYsize;
|
|
size_t srcXoffset, srcYoffset, srcZoffset, dstXoffset, dstYoffset, dstZoffset;
|
|
size_t srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth;
|
|
|
|
void* srcPtr, *dstPtr;
|
|
bool copyWidthUpdate= false;
|
|
copyDepth = p->extent.depth;
|
|
copyHeight = p->extent.height;
|
|
copyWidth = p->extent.width; // in bytes ?
|
|
dstXoffset = p->dstPos.x;
|
|
dstYoffset = p->dstPos.y;
|
|
dstZoffset = p->dstPos.z;
|
|
srcXoffset = p->srcPos.x;
|
|
srcYoffset = p->srcPos.y;
|
|
srcZoffset = p->srcPos.z;
|
|
if (copyWidth == 0) return hipSuccess;
|
|
if (p->dstArray != nullptr) {
|
|
if ((p->dstArray->isDrv == true) ||( p->dstPtr.ptr!= nullptr)){
|
|
return hipErrorInvalidValue;
|
|
}
|
|
// Array destination
|
|
dstByteSize = getByteSizeFromFormat(p->dstArray->desc);
|
|
hipChannelFormatDesc desc;
|
|
desc = p->dstArray->desc;
|
|
dstPtr = p->dstArray->data;
|
|
dstWidth = p->dstArray->width;
|
|
dstHeight = p->dstArray->height;
|
|
dstDepth = p->dstArray->depth;
|
|
dstPitch = dstByteSize * alignUp(dstWidth, IMAGE_PITCH_ALIGNMENT);
|
|
if(!copyWidthUpdate) {
|
|
copyWidth = copyWidth * dstByteSize;
|
|
copyWidthUpdate = true;
|
|
}
|
|
} else {
|
|
//Non Array destination
|
|
dstPtr = p->dstPtr.ptr;
|
|
dstWidth = p->dstPtr.xsize;
|
|
dstHeight = p->dstPtr.ysize;
|
|
dstPitch = p->dstPtr.pitch;
|
|
}
|
|
|
|
if (p->srcArray != nullptr) {
|
|
if ((p->srcArray->isDrv == true) ||( p->srcPtr.ptr!= nullptr)){
|
|
return hipErrorInvalidValue;
|
|
}
|
|
// Array source
|
|
srcByteSize = getByteSizeFromFormat(p->srcArray->desc);
|
|
hipChannelFormatDesc desc;
|
|
desc = p->srcArray->desc;
|
|
srcPtr = p->srcArray->data;
|
|
srcWidth = p->srcArray->width;
|
|
srcHeight = p->srcArray->height;
|
|
srcDepth = p->srcArray->depth;
|
|
srcPitch = srcByteSize * alignUp(srcWidth, IMAGE_PITCH_ALIGNMENT);
|
|
if(!copyWidthUpdate) {
|
|
copyWidth = copyWidth * srcByteSize;
|
|
copyWidthUpdate = true;
|
|
}
|
|
} else {
|
|
//Non Array source
|
|
srcPtr = p->srcPtr.ptr;
|
|
srcWidth = p->srcPtr.xsize;
|
|
srcHeight = p->srcPtr.ysize;
|
|
srcPitch = p->srcPtr.pitch;
|
|
}
|
|
|
|
stream = ihipSyncAndResolveStream(stream);
|
|
try {
|
|
if((copyWidth == dstPitch) && (copyWidth == srcPitch)&& (copyHeight == dstHeight) &&(copyHeight == srcHeight)) {
|
|
if(isAsync)
|
|
stream->locked_copyAsync((void*)dstPtr, (void*)srcPtr, copyWidth*copyHeight*copyDepth, p->kind);
|
|
else
|
|
stream->locked_copySync((void*)dstPtr, (void*)srcPtr, copyWidth*copyHeight*copyDepth, p->kind, false);
|
|
} else {
|
|
for (int i = 0; i < copyDepth; i++) {
|
|
for (int j = 0; j < copyHeight; j++) {
|
|
unsigned char* src =
|
|
(unsigned char*)srcPtr + (i + srcZoffset) * srcHeight * srcPitch + (j + srcYoffset) * srcPitch + srcXoffset;
|
|
unsigned char* dst =
|
|
(unsigned char*)dstPtr + (i + dstZoffset) * dstHeight * dstPitch + (j + dstYoffset) * dstPitch + dstXoffset;
|
|
if(isAsync)
|
|
stream->locked_copyAsync(dst, src, copyWidth, p->kind);
|
|
else
|
|
stream->locked_copySync(dst, src, copyWidth, p->kind);
|
|
}
|
|
}
|
|
}
|
|
} catch (ihipException ex) {
|
|
e = ex._code;
|
|
}
|
|
} else {
|
|
e = hipErrorInvalidValue;
|
|
}
|
|
return e;
|
|
}
|
|
|
|
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpy3D, (TRACE_MCMD), p);
|
|
hipError_t e = hipSuccess;
|
|
e = ihipMemcpy3D(p, hipStreamNull, false);
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpy3DAsync, (TRACE_MCMD), p, stream);
|
|
hipError_t e = hipSuccess;
|
|
e = ihipMemcpy3D(p, stream, true);
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
namespace {
|
|
template <uint32_t block_dim, uint32_t items_per_lane,
|
|
typename RandomAccessIterator, typename N, typename T>
|
|
__global__ void hip_fill_n(RandomAccessIterator f, N n, T value) {
|
|
const auto grid_dim = gridDim.x * blockDim.x * items_per_lane;
|
|
const auto gidx = blockIdx.x * block_dim + threadIdx.x;
|
|
|
|
size_t idx = gidx * items_per_lane;
|
|
while (idx + items_per_lane <= n) {
|
|
for (auto i = 0u; i != items_per_lane; ++i) {
|
|
__builtin_nontemporal_store(value, &f[idx + i]);
|
|
}
|
|
idx += grid_dim;
|
|
}
|
|
|
|
if (gidx < n % grid_dim) {
|
|
__builtin_nontemporal_store(value, &f[n - gidx - 1]);
|
|
}
|
|
}
|
|
|
|
template <typename T, typename std::enable_if<std::is_integral<T>{}>::type* = nullptr>
|
|
inline const T& clamp_integer(const T& x, const T& lower, const T& upper) {
|
|
assert(!(upper < lower));
|
|
|
|
return std::min(upper, std::max(x, lower));
|
|
}
|
|
|
|
template <typename T>
|
|
__global__ void hip_copy2d_n(T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) {
|
|
|
|
size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
size_t idy = blockIdx.y * blockDim.y + threadIdx.y;
|
|
size_t floorWidth = (width/sizeof(T));
|
|
T *dstPtr = (T *)((uint8_t*) dst + idy * destPitch);
|
|
T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch);
|
|
if((idx < floorWidth) && (idy < height)){
|
|
dstPtr[idx] = srcPtr[idx];
|
|
} else if((idx < width) && (idy < height)){
|
|
size_t bytesToCopy = width - (floorWidth * sizeof(T));
|
|
dstPtr += floorWidth;
|
|
srcPtr += floorWidth;
|
|
__builtin_memcpy(reinterpret_cast<uint8_t*>(dstPtr), reinterpret_cast<const uint8_t*>(srcPtr),bytesToCopy);
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
//Get the allocated size
|
|
hipError_t ihipMemPtrGetInfo(void* ptr, size_t* size) {
|
|
hipError_t e = hipSuccess;
|
|
if (ptr != nullptr && size != nullptr) {
|
|
*size = 0;
|
|
hc::accelerator acc;
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
|
if (status == AM_SUCCESS) {
|
|
*size = amPointerInfo._sizeBytes;
|
|
} else {
|
|
e = hipErrorInvalidValue;
|
|
}
|
|
} else {
|
|
e = hipErrorInvalidValue;
|
|
}
|
|
return e;
|
|
}
|
|
|
|
template <typename T>
|
|
void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t count) {
|
|
static constexpr uint32_t block_dim = 256;
|
|
static constexpr uint32_t max_write_width = 4 * sizeof(std::uint32_t); // 4 DWORDs
|
|
static constexpr uint32_t items_per_lane = max_write_width / sizeof(T);
|
|
|
|
const uint32_t grid_dim = clamp_integer<size_t>(
|
|
count / (block_dim * items_per_lane), 1, UINT32_MAX);
|
|
|
|
hipLaunchKernelGGL(hip_fill_n<block_dim, items_per_lane>, dim3(grid_dim),
|
|
dim3{block_dim}, 0u, stream, ptr, count, std::move(val));
|
|
}
|
|
|
|
template <typename T>
|
|
void ihipMemcpy2dKernel(hipStream_t stream, T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) {
|
|
size_t threadsPerBlock_x = 64;
|
|
size_t threadsPerBlock_y = 4;
|
|
uint32_t grid_dim_x = clamp_integer<size_t>( (width+(threadsPerBlock_x*sizeof(T)-1)) / (threadsPerBlock_x*sizeof(T)), 1, UINT32_MAX);
|
|
uint32_t grid_dim_y = clamp_integer<size_t>( (height+(threadsPerBlock_y-1)) / threadsPerBlock_y, 1, UINT32_MAX);
|
|
hipLaunchKernelGGL(hip_copy2d_n, dim3(grid_dim_x,grid_dim_y), dim3(threadsPerBlock_x,threadsPerBlock_y), 0u, stream, dst, src,
|
|
width, height, destPitch, srcPitch);
|
|
}
|
|
|
|
typedef enum ihipMemsetDataType {
|
|
ihipMemsetDataTypeChar = 0,
|
|
ihipMemsetDataTypeShort = 1,
|
|
ihipMemsetDataTypeInt = 2
|
|
}ihipMemsetDataType;
|
|
|
|
hipError_t ihipMemsetAsync(void* dst, int value, size_t count, hipStream_t stream, enum ihipMemsetDataType copyDataType) {
|
|
if (count == 0) return hipSuccess;
|
|
if (!dst) return hipErrorInvalidValue;
|
|
|
|
try {
|
|
if (copyDataType == ihipMemsetDataTypeChar) {
|
|
if ((count & 0x3) == 0) {
|
|
// use a faster dword-per-workitem copy:
|
|
value = value & 0xff;
|
|
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ;
|
|
ihipMemsetKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), value32, count/sizeof(uint32_t));
|
|
} else {
|
|
// use a slow byte-per-workitem copy:
|
|
ihipMemsetKernel<char> (stream, static_cast<char*> (dst), value, count);
|
|
}
|
|
} else if (copyDataType == ihipMemsetDataTypeInt) { // 4 Bytes value
|
|
ihipMemsetKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), value, count);
|
|
} else if (copyDataType == ihipMemsetDataTypeShort) {
|
|
value = value & 0xffff;
|
|
ihipMemsetKernel<uint16_t> (stream, static_cast<uint16_t*> (dst), value, count);
|
|
}
|
|
} catch (...) {
|
|
return hipErrorInvalidValue;
|
|
}
|
|
|
|
if (HIP_API_BLOCKING) {
|
|
tprintf (DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetAsync.\n", ToString(stream).c_str());
|
|
stream->locked_wait();
|
|
}
|
|
|
|
return hipSuccess;
|
|
}
|
|
|
|
namespace {
|
|
template<typename T>
|
|
void handleHeadTail(T* dst, std::size_t n_head, std::size_t n_body,
|
|
std::size_t n_tail, hipStream_t stream, int value) {
|
|
struct Cleaner {
|
|
static
|
|
__global__
|
|
void clean(T* p, std::size_t nh, std::size_t nb, int x) noexcept {
|
|
p[(threadIdx.x < nh) ? threadIdx.x : (threadIdx.x - nh + nb)] = x;
|
|
}
|
|
};
|
|
|
|
hipLaunchKernelGGL(Cleaner::clean, 1, n_head + n_tail, 0, stream,
|
|
dst, n_head,
|
|
n_body * sizeof(std::uint32_t) / sizeof(T), value);
|
|
|
|
}
|
|
} // Anonymous namespace.
|
|
|
|
hipError_t ihipMemsetSync(void* dst, int value, size_t count, hipStream_t stream, ihipMemsetDataType copyDataType) {
|
|
if (count == 0) return hipSuccess;
|
|
if (!dst) return hipErrorInvalidValue;
|
|
|
|
try {
|
|
size_t n = count;
|
|
auto aligned_dst{(copyDataType == ihipMemsetDataTypeInt) ? dst :
|
|
reinterpret_cast<void*>(
|
|
hip_impl::round_up_to_next_multiple_nonnegative(
|
|
reinterpret_cast<std::uintptr_t>(dst), 4ul))};
|
|
size_t n_head{};
|
|
size_t n_tail{};
|
|
int original_value = value;
|
|
|
|
switch (copyDataType) {
|
|
case ihipMemsetDataTypeChar:
|
|
value &= 0xff;
|
|
value = (value << 24) | (value << 16) | (value << 8) | value;
|
|
n_head = static_cast<std::uint8_t*>(aligned_dst) -
|
|
static_cast<std::uint8_t*>(dst);
|
|
n -= n_head;
|
|
n /= sizeof(std::uint32_t);
|
|
n_tail = count % sizeof(std::uint32_t);
|
|
break;
|
|
case ihipMemsetDataTypeShort:
|
|
value &= 0xffff;
|
|
value = (value << 16) | value;
|
|
n_head = static_cast<std::uint16_t*>(aligned_dst) -
|
|
static_cast<std::uint16_t*>(dst);
|
|
n = (count - n_head) *
|
|
sizeof(std::uint16_t) / sizeof(std::uint32_t);
|
|
n_tail = ((count - n_head) *
|
|
sizeof(std::uint16_t)) % sizeof(std::uint32_t);
|
|
break;
|
|
default: break;
|
|
}
|
|
|
|
// queue the memset kernel for the remainder of the buffer before the HSA call below
|
|
if (aligned_dst != dst || n_tail != 0) {
|
|
switch (copyDataType) {
|
|
case ihipMemsetDataTypeChar:
|
|
handleHeadTail(static_cast<std::uint8_t*>(dst), n_head, n,
|
|
n_tail, stream, value & 0xff);
|
|
break;
|
|
case ihipMemsetDataTypeShort:
|
|
handleHeadTail(static_cast<std::uint16_t*>(dst), n_head, n,
|
|
n_tail, stream, value & 0xffff);
|
|
break;
|
|
default: break;
|
|
}
|
|
}
|
|
|
|
// The stream must be locked from all other op insertions to guarantee
|
|
// that the following HSA call can complete before any other ops.
|
|
// Flush the stream while locked. Once the stream is empty, we can safely perform
|
|
// the out-of-band HSA call. Lastly, the stream will unlock via RAII.
|
|
if (!stream) stream = ihipSyncAndResolveStream(stream);
|
|
if (!stream) return hipErrorInvalidValue;
|
|
|
|
LockedAccessor_StreamCrit_t crit(stream->criticalData());
|
|
crit->_av.wait(stream->waitMode());
|
|
const auto s = hsa_amd_memory_fill(aligned_dst, value, n);
|
|
if (s != HSA_STATUS_SUCCESS) return hipErrorInvalidValue;
|
|
}
|
|
catch (...) {
|
|
return hipErrorInvalidValue;
|
|
}
|
|
|
|
if (HIP_API_BLOCKING) {
|
|
tprintf (DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetSync.\n", ToString(stream).c_str());
|
|
stream->locked_wait();
|
|
}
|
|
|
|
return hipSuccess;
|
|
}
|
|
|
|
hipError_t getLockedPointer(void *hostPtr, size_t dataLen, void **devicePtrPtr)
|
|
{
|
|
hc::accelerator acc;
|
|
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
|
|
if (status == AM_SUCCESS) {
|
|
*devicePtrPtr = static_cast<char*>(amPointerInfo._devicePointer) +
|
|
(static_cast<char*>(hostPtr) - static_cast<char*>(amPointerInfo._hostPointer));
|
|
return(hipSuccess);
|
|
};
|
|
return(hipErrorHostMemoryNotRegistered);
|
|
}
|
|
|
|
// TODO - review and optimize
|
|
hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
|
|
size_t height, hipMemcpyKind kind) {
|
|
if (height == 0 || width == 0) return hipSuccess;
|
|
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue;
|
|
|
|
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
|
int isLockedOrD2D = 0;
|
|
void *pinnedPtr=NULL;
|
|
void *actualSrc = (void*)src;
|
|
void *actualDest = dst;
|
|
if(kind == hipMemcpyHostToDevice ) {
|
|
if(getLockedPointer((void*)src, spitch, &pinnedPtr) == hipSuccess ){
|
|
isLockedOrD2D = 1;
|
|
actualSrc = pinnedPtr;
|
|
}
|
|
} else if(kind == hipMemcpyDeviceToHost) {
|
|
if(getLockedPointer((void*)dst, dpitch, &pinnedPtr) == hipSuccess ){
|
|
isLockedOrD2D = 1;
|
|
actualDest = pinnedPtr;
|
|
}
|
|
} else if(kind == hipMemcpyDeviceToDevice) {
|
|
isLockedOrD2D = 1;
|
|
}
|
|
|
|
hc::completion_future marker;
|
|
|
|
hipError_t e = hipSuccess;
|
|
if((width == dpitch) && (width == spitch)) {
|
|
stream->locked_copySync((void*)dst, (void*)src, width*height, kind, false);
|
|
} else {
|
|
try {
|
|
if(!isLockedOrD2D) {
|
|
for (int i = 0; i < height; ++i)
|
|
stream->locked_copySync((unsigned char*)dst + i * dpitch,
|
|
(unsigned char*)src + i * spitch, width, kind);
|
|
} else {
|
|
if(!stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind)){
|
|
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
|
|
stream->locked_wait();
|
|
}
|
|
}
|
|
} catch (ihipException& ex) {
|
|
e = ex._code;
|
|
}
|
|
}
|
|
|
|
return e;
|
|
}
|
|
|
|
hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
|
|
size_t height, hipMemcpyKind kind) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpy2D, (TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind);
|
|
hipError_t e = hipSuccess;
|
|
e = ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind);
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
hipError_t ihipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
|
|
size_t height, hipMemcpyKind kind, hipStream_t stream) {
|
|
if (height == 0 || width == 0) return hipSuccess;
|
|
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue;
|
|
hipError_t e = hipSuccess;
|
|
int isLockedOrD2D = 0;
|
|
void *pinnedPtr=NULL;
|
|
void *actualSrc = (void*)src;
|
|
void *actualDest = dst;
|
|
stream = ihipSyncAndResolveStream(stream);
|
|
if(kind == hipMemcpyHostToDevice ) {
|
|
if(getLockedPointer((void*)src, spitch, &pinnedPtr) == hipSuccess ){
|
|
isLockedOrD2D = 1;
|
|
actualSrc = pinnedPtr;
|
|
}
|
|
} else if(kind == hipMemcpyDeviceToHost) {
|
|
if(getLockedPointer((void*)dst, dpitch, &pinnedPtr) == hipSuccess ){
|
|
isLockedOrD2D = 1;
|
|
actualDest = pinnedPtr;
|
|
}
|
|
} else if(kind == hipMemcpyDeviceToDevice) {
|
|
isLockedOrD2D = 1;
|
|
}
|
|
|
|
if((width == dpitch) && (width == spitch)) {
|
|
hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
|
|
} else {
|
|
try {
|
|
if(!isLockedOrD2D){
|
|
for (int i = 0; i < height; ++i)
|
|
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
|
|
(unsigned char*)src + i * spitch, width, kind, stream);
|
|
} else{
|
|
if(!stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind)){
|
|
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
|
|
}
|
|
}
|
|
} catch (ihipException& ex) {
|
|
e = ex._code;
|
|
}
|
|
}
|
|
|
|
return e;
|
|
}
|
|
|
|
hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
|
|
size_t height, hipMemcpyKind kind, hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpy2DAsync, (TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream);
|
|
hipError_t e = hipSuccess;
|
|
e = ihipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream);
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
hipError_t ihip2dOffsetMemcpy(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
|
|
size_t height, size_t srcXOffsetInBytes, size_t srcYOffset,
|
|
size_t dstXOffsetInBytes, size_t dstYOffset,hipMemcpyKind kind,
|
|
hipStream_t stream, bool isAsync) {
|
|
if (height == 0 || width == 0) return hipSuccess;
|
|
if((spitch < width + srcXOffsetInBytes) || (srcYOffset >= height)){
|
|
return hipErrorInvalidValue;
|
|
} else if((dpitch < width + dstXOffsetInBytes) || (dstYOffset >= height)){
|
|
return hipErrorInvalidValue;
|
|
}
|
|
src = (void*)((char*)src+ srcYOffset*spitch + srcXOffsetInBytes);
|
|
dst = (void*)((char*)dst+ dstYOffset*dpitch + dstXOffsetInBytes);
|
|
if(isAsync){
|
|
return ihipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, hipMemcpyDefault, stream);
|
|
} else{
|
|
return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyDefault);
|
|
}
|
|
}
|
|
|
|
hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool isAsync) {
|
|
if (pCopy == nullptr) {
|
|
return hipErrorInvalidValue;
|
|
}
|
|
if (pCopy->Height == 0 || pCopy->WidthInBytes == 0) return hipSuccess;
|
|
void* dst; const void* src;
|
|
size_t spitch = pCopy->srcPitch;
|
|
size_t dpitch = pCopy->dstPitch;
|
|
switch(pCopy->srcMemoryType){
|
|
case hipMemoryTypeHost:
|
|
src = pCopy->srcHost;
|
|
break;
|
|
case hipMemoryTypeArray:
|
|
src = pCopy->srcArray->data;
|
|
spitch = pCopy->WidthInBytes;
|
|
break;
|
|
case hipMemoryTypeUnified:
|
|
case hipMemoryTypeDevice:
|
|
src = pCopy->srcDevice;
|
|
break;
|
|
default:
|
|
return hipErrorInvalidValue;
|
|
}
|
|
switch(pCopy->dstMemoryType){
|
|
case hipMemoryTypeHost:
|
|
dst = pCopy->dstHost;
|
|
break;
|
|
case hipMemoryTypeArray:
|
|
dst = pCopy->dstArray->data;
|
|
dpitch = pCopy->WidthInBytes;
|
|
break;
|
|
case hipMemoryTypeUnified:
|
|
case hipMemoryTypeDevice:
|
|
dst = pCopy->dstDevice;
|
|
break;
|
|
default:
|
|
return hipErrorInvalidValue;
|
|
}
|
|
return ihip2dOffsetMemcpy(dst, dpitch, src, spitch, pCopy->WidthInBytes,
|
|
pCopy->Height, pCopy->srcXInBytes, pCopy->srcY,
|
|
pCopy->dstXInBytes, pCopy->dstY, hipMemcpyDefault,
|
|
stream, isAsync);
|
|
}
|
|
|
|
hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyParam2D, (TRACE_MCMD), pCopy);
|
|
return ihipLogStatus(ihipMemcpyParam2D(pCopy, hipStreamNull, false));
|
|
}
|
|
|
|
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemcpyParam2DAsync, (TRACE_MCMD), pCopy, stream);
|
|
return ihipLogStatus(ihipMemcpyParam2D(pCopy, stream, true));
|
|
}
|
|
|
|
hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind ){
|
|
HIP_INIT_SPECIAL_API(hipMemcpy2DFromArray, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind);
|
|
size_t byteSize;
|
|
if(src) {
|
|
switch (src->desc.f) {
|
|
case hipChannelFormatKindSigned:
|
|
byteSize = sizeof(int);
|
|
break;
|
|
case hipChannelFormatKindUnsigned:
|
|
byteSize = sizeof(unsigned int);
|
|
break;
|
|
case hipChannelFormatKindFloat:
|
|
byteSize = sizeof(float);
|
|
break;
|
|
case hipChannelFormatKindNone:
|
|
byteSize = sizeof(size_t);
|
|
break;
|
|
default:
|
|
byteSize = 0;
|
|
break;
|
|
}
|
|
} else {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
return ihipLogStatus(ihip2dOffsetMemcpy(dst, dpitch, src->data, src->width*byteSize, width, height, wOffset, hOffset, 0, 0, kind, hipStreamNull, false));
|
|
}
|
|
|
|
hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream ){
|
|
HIP_INIT_SPECIAL_API(hipMemcpy2DFromArrayAsync, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind, stream);
|
|
size_t byteSize;
|
|
if (height == 0 || width == 0) return ihipLogStatus(hipSuccess);
|
|
if(src) {
|
|
switch (src->desc.f) {
|
|
case hipChannelFormatKindSigned:
|
|
byteSize = sizeof(int);
|
|
break;
|
|
case hipChannelFormatKindUnsigned:
|
|
byteSize = sizeof(unsigned int);
|
|
break;
|
|
case hipChannelFormatKindFloat:
|
|
byteSize = sizeof(float);
|
|
break;
|
|
case hipChannelFormatKindNone:
|
|
byteSize = sizeof(size_t);
|
|
break;
|
|
default:
|
|
byteSize = 0;
|
|
break;
|
|
}
|
|
} else {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
return ihipLogStatus(ihip2dOffsetMemcpy(dst, dpitch, src->data, src->width*byteSize, width, height, wOffset, hOffset, 0, 0, kind, stream, true));
|
|
}
|
|
|
|
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
|
|
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemsetAsync, (TRACE_MCMD), dst, value, sizeBytes, stream);
|
|
return ihipLogStatus(ihipMemsetAsync(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar));
|
|
}
|
|
|
|
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream) {
|
|
HIP_INIT_SPECIAL_API(hipMemsetD32Async, (TRACE_MCMD), dst, value, count, stream);
|
|
return ihipLogStatus(ihipMemsetAsync(dst, value, count, stream, ihipMemsetDataTypeInt));
|
|
}
|
|
|
|
hipError_t hipMemset(void* dst, int value, size_t sizeBytes) {
|
|
HIP_INIT_SPECIAL_API(hipMemset, (TRACE_MCMD), dst, value, sizeBytes);
|
|
return ihipLogStatus(ihipMemsetSync(dst, value, sizeBytes, nullptr, ihipMemsetDataTypeChar));
|
|
}
|
|
|
|
hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
|
|
HIP_INIT_SPECIAL_API(hipMemset2D, (TRACE_MCMD), dst, pitch, value, width, height);
|
|
size_t sizeBytes = pitch * height;
|
|
return ihipLogStatus(ihipMemsetSync(dst, value, sizeBytes, nullptr, ihipMemsetDataTypeChar));
|
|
}
|
|
|
|
hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream ) {
|
|
HIP_INIT_SPECIAL_API(hipMemset2DAsync, (TRACE_MCMD), dst, pitch, value, width, height, stream);
|
|
size_t sizeBytes = pitch * height;
|
|
return ihipLogStatus(ihipMemsetAsync(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar));
|
|
}
|
|
|
|
hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t count) {
|
|
HIP_INIT_SPECIAL_API(hipMemsetD8, (TRACE_MCMD), dst, value, count);
|
|
return ihipLogStatus(ihipMemsetSync(dst, value, count, nullptr, ihipMemsetDataTypeChar));
|
|
}
|
|
|
|
hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t count , hipStream_t stream ) {
|
|
HIP_INIT_SPECIAL_API(hipMemsetD8Async, (TRACE_MCMD), dst, value, count, stream);
|
|
return ihipLogStatus(ihipMemsetAsync(dst, value, count, stream, ihipMemsetDataTypeChar));
|
|
}
|
|
|
|
hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t count){
|
|
HIP_INIT_SPECIAL_API(hipMemsetD16, (TRACE_MCMD), dst, value, count);
|
|
return ihipLogStatus(ihipMemsetSync(dst, value, count, nullptr, ihipMemsetDataTypeShort));
|
|
}
|
|
|
|
hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t count, hipStream_t stream ){
|
|
HIP_INIT_SPECIAL_API(hipMemsetD16Async, (TRACE_MCMD), dst, value, count, stream);
|
|
return ihipLogStatus(ihipMemsetAsync(dst, value, count, stream, ihipMemsetDataTypeShort));
|
|
}
|
|
|
|
hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) {
|
|
HIP_INIT_SPECIAL_API(hipMemsetD32, (TRACE_MCMD), dst, value, count);
|
|
return ihipLogStatus(ihipMemsetSync(dst, value, count, nullptr, ihipMemsetDataTypeInt));
|
|
}
|
|
|
|
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) {
|
|
HIP_INIT_SPECIAL_API(hipMemset3D, (TRACE_MCMD), &pitchedDevPtr, value, &extent);
|
|
size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth;
|
|
return ihipLogStatus(ihipMemsetSync(pitchedDevPtr.ptr, value, sizeBytes, nullptr, ihipMemsetDataTypeChar));
|
|
}
|
|
|
|
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream ) {
|
|
HIP_INIT_SPECIAL_API(hipMemset3DAsync, (TRACE_MCMD), &pitchedDevPtr, value, &extent);
|
|
size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth;
|
|
return ihipLogStatus(ihipMemsetAsync(pitchedDevPtr.ptr, value, sizeBytes, stream, ihipMemsetDataTypeChar));
|
|
}
|
|
|
|
hipError_t hipMemGetInfo(size_t* free, size_t* total) {
|
|
HIP_INIT_API(hipMemGetInfo, free, total);
|
|
|
|
hipError_t e = hipSuccess;
|
|
|
|
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
|
|
if (ctx) {
|
|
auto device = ctx->getWriteableDevice();
|
|
if (total) {
|
|
*total = device->_props.totalGlobalMem;
|
|
}
|
|
|
|
if (free) {
|
|
if (!device->_driver_node_id) return ihipLogStatus(hipErrorInvalidDevice);
|
|
|
|
std::string fileName = std::string("/sys/class/kfd/kfd/topology/nodes/") + std::to_string(device->_driver_node_id) + std::string("/mem_banks/0/used_memory");
|
|
std::ifstream file;
|
|
file.open(fileName);
|
|
if (!file) return ihipLogStatus(hipErrorFileNotFound);
|
|
|
|
std::string deviceSize;
|
|
size_t deviceMemSize;
|
|
|
|
file >> deviceSize;
|
|
file.close();
|
|
if ((deviceMemSize=strtol(deviceSize.c_str(),NULL,10))){
|
|
*free = device->_props.totalGlobalMem - deviceMemSize;
|
|
// Deduct the amount of memory from the free memory reported from the system
|
|
if (HIP_HIDDEN_FREE_MEM) *free -= (size_t)HIP_HIDDEN_FREE_MEM * 1024 * 1024;
|
|
} else {
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
}
|
|
}
|
|
|
|
} else {
|
|
e = hipErrorInvalidDevice;
|
|
}
|
|
|
|
return ihipLogStatus(e);
|
|
}
|
|
|
|
hipError_t hipMemPtrGetInfo(void* ptr, size_t* size) {
|
|
HIP_INIT_API(hipMemPtrGetInfo, ptr, size);
|
|
|
|
return ihipLogStatus(ihipMemPtrGetInfo(ptr, size));
|
|
}
|
|
|
|
|
|
hipError_t hipFree(void* ptr) {
|
|
HIP_INIT_SPECIAL_API(hipFree, (TRACE_MEM), ptr);
|
|
|
|
hipError_t hipStatus = hipErrorInvalidDevicePointer;
|
|
|
|
if (ptr) {
|
|
hc::accelerator acc;
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
|
if (status == AM_SUCCESS) {
|
|
/*if (amPointerInfo._hostPointer == NULL) */ //TODO: Fix it when there is proper managed memory support
|
|
{
|
|
if (HIP_SYNC_FREE) {
|
|
// Synchronize all devices, all streams
|
|
// to ensure all work has finished on all devices.
|
|
// This is disabled by default.
|
|
for (unsigned i = 0; i < g_deviceCnt; i++) {
|
|
ihipGetPrimaryCtx(i)->locked_waitAllStreams();
|
|
}
|
|
}
|
|
else {
|
|
ihipCtx_t* ctx;
|
|
if (amPointerInfo._appId != -1) {
|
|
#if USE_APP_PTR_FOR_CTX
|
|
ctx = static_cast<ihipCtx_t*>(amPointerInfo._appPtr);
|
|
#else
|
|
ctx = ihipGetPrimaryCtx(amPointerInfo._appId);
|
|
#endif
|
|
} else {
|
|
ctx = ihipGetTlsDefaultCtx();
|
|
}
|
|
// Synchronize to ensure all work has finished on device owning the memory.
|
|
ctx->locked_waitAllStreams(); // ignores non-blocking streams, this waits
|
|
// for all activity to finish.
|
|
}
|
|
hc::am_free(ptr);
|
|
hipStatus = hipSuccess;
|
|
}
|
|
}
|
|
} else {
|
|
// free NULL pointer succeeds and is common technique to initialize runtime
|
|
hipStatus = hipSuccess;
|
|
}
|
|
|
|
return ihipLogStatus(hipStatus);
|
|
}
|
|
|
|
|
|
hipError_t hipHostFree(void* ptr) {
|
|
HIP_INIT_SPECIAL_API(hipHostFree, (TRACE_MEM), ptr);
|
|
|
|
hipError_t hipStatus = hipSuccess;
|
|
hipStatus = hip_internal::ihipHostFree(tls, ptr);
|
|
|
|
return ihipLogStatus(hipStatus);
|
|
};
|
|
|
|
|
|
// Deprecated:
|
|
hipError_t hipFreeHost(void* ptr) { return hipHostFree(ptr); }
|
|
|
|
hipError_t hipFreeArray(hipArray* array) {
|
|
HIP_INIT_SPECIAL_API(hipFreeArray, (TRACE_MEM), array);
|
|
|
|
hipError_t hipStatus = hipErrorInvalidDevicePointer;
|
|
|
|
// Synchronize to ensure all work has finished.
|
|
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits
|
|
// for all activity to finish.
|
|
|
|
if (array->data) {
|
|
hc::accelerator acc;
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, array->data);
|
|
if (status == AM_SUCCESS) {
|
|
if (amPointerInfo._hostPointer == NULL) {
|
|
hc::am_free(array->data);
|
|
hipStatus = hipSuccess;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ihipLogStatus(hipStatus);
|
|
}
|
|
|
|
hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) {
|
|
HIP_INIT_API(hipMemGetAddressRange, pbase, psize, dptr);
|
|
hipError_t hipStatus = hipSuccess;
|
|
hc::accelerator acc;
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, dptr);
|
|
if (status == AM_SUCCESS) {
|
|
*pbase = amPointerInfo._devicePointer;
|
|
*psize = amPointerInfo._sizeBytes;
|
|
} else
|
|
hipStatus = hipErrorInvalidDevicePointer;
|
|
return ihipLogStatus(hipStatus);
|
|
}
|
|
|
|
|
|
// TODO: IPC implementaiton:
|
|
|
|
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
|
|
HIP_INIT_API(hipIpcGetMemHandle, handle, devPtr);
|
|
hipError_t hipStatus = hipSuccess;
|
|
// Get the size of allocated pointer
|
|
size_t psize = 0u;
|
|
hc::accelerator acc;
|
|
if ((handle == NULL) || (devPtr == NULL)) {
|
|
hipStatus = hipErrorInvalidHandle;
|
|
} else {
|
|
#if (__hcc_workweek__ >= 17332)
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
|
#else
|
|
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
|
#endif
|
|
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, devPtr);
|
|
if (status == AM_SUCCESS) {
|
|
psize = (size_t)amPointerInfo._sizeBytes;
|
|
} else {
|
|
hipStatus = hipErrorInvalidHandle;
|
|
}
|
|
ihipIpcMemHandle_t* iHandle = (ihipIpcMemHandle_t*)handle;
|
|
// Save the size of the pointer to hipIpcMemHandle
|
|
iHandle->psize = psize;
|
|
|
|
#if USE_IPC
|
|
// Create HSA ipc memory
|
|
hsa_status_t hsa_status =
|
|
hsa_amd_ipc_memory_create(devPtr, psize, (hsa_amd_ipc_memory_t*)&(iHandle->ipc_handle));
|
|
if (hsa_status != HSA_STATUS_SUCCESS) hipStatus = hipErrorOutOfMemory;
|
|
#else
|
|
hipStatus = hipErrorRuntimeOther;
|
|
#endif
|
|
}
|
|
return ihipLogStatus(hipStatus);
|
|
}
|
|
|
|
hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags) {
|
|
HIP_INIT_API(hipIpcOpenMemHandle, devPtr, &handle, flags);
|
|
hipError_t hipStatus = hipSuccess;
|
|
if (devPtr == NULL)
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
|
|
#if USE_IPC
|
|
// Get the current device agent.
|
|
hc::accelerator acc;
|
|
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
|
if (!agent)
|
|
return ihipLogStatus(hipErrorInvalidHandle);
|
|
|
|
ihipIpcMemHandle_t* iHandle = (ihipIpcMemHandle_t*)&handle;
|
|
// Attach ipc memory
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
{
|
|
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
|
auto device = ctx->getWriteableDevice();
|
|
// the peerCnt always stores self so make sure the trace actually
|
|
if(hsa_amd_ipc_memory_attach(
|
|
(hsa_amd_ipc_memory_t*)&(iHandle->ipc_handle), iHandle->psize, crit->peerCnt(),
|
|
crit->peerAgents(), devPtr) != HSA_STATUS_SUCCESS)
|
|
return ihipLogStatus(hipErrorRuntimeOther);
|
|
|
|
hc::AmPointerInfo ampi(NULL, *devPtr, *devPtr, sizeof(*devPtr), acc, true, true);
|
|
am_status_t am_status = hc::am_memtracker_add(*devPtr,ampi);
|
|
if (am_status != AM_SUCCESS)
|
|
return ihipLogStatus(hipErrorMapFailed);
|
|
|
|
#if USE_APP_PTR_FOR_CTX
|
|
am_status = hc::am_memtracker_update(*devPtr, device->_deviceId, 0, ctx);
|
|
#else
|
|
am_status = hc::am_memtracker_update(*devPtr, device->_deviceId, 0);
|
|
#endif
|
|
if(am_status != AM_SUCCESS)
|
|
return ihipLogStatus(hipErrorMapFailed);
|
|
}
|
|
#else
|
|
hipStatus = hipErrorRuntimeOther;
|
|
#endif
|
|
|
|
return ihipLogStatus(hipStatus);
|
|
}
|
|
|
|
hipError_t hipIpcCloseMemHandle(void* devPtr) {
|
|
HIP_INIT_API(hipIpcCloseMemHandle, devPtr);
|
|
hipError_t hipStatus = hipSuccess;
|
|
if (devPtr == NULL)
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
|
|
#if USE_IPC
|
|
if(hc::am_memtracker_remove(devPtr) != AM_SUCCESS)
|
|
return ihipLogStatus(hipErrorInvalidValue);
|
|
|
|
if (hsa_amd_ipc_memory_detach(devPtr) != HSA_STATUS_SUCCESS)
|
|
return ihipLogStatus(hipErrorInvalidHandle);
|
|
#else
|
|
hipStatus = hipErrorRuntimeOther;
|
|
#endif
|
|
|
|
return ihipLogStatus(hipStatus);
|
|
}
|
|
|
|
// hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle){
|
|
// return hipSuccess;
|
|
// }
|