Merge pull request #755 from gargrahul/fix_memcpy_symb_nonzerodev
Fixed hipMemcpyToSymbol doesn't work on GPU other than device 0
[ROCm/hip commit: b4e4aafc16]
Bu işleme şunda yer alıyor:
@@ -985,10 +985,9 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost ||
|
||||
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDefault ||
|
||||
kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) {
|
||||
stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind);
|
||||
// acc.memcpy_symbol(dst, (void*)src, count+offset);
|
||||
stream->locked_copySync((char*)dst+offset, (void*)src, count, kind, false);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -1018,9 +1017,9 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count,
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost ||
|
||||
if (kind == hipMemcpyDefault || kind == hipMemcpyDeviceToHost ||
|
||||
kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) {
|
||||
stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind);
|
||||
stream->locked_copySync((void*)dst, (char*)src+offset, count, kind, false);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -1052,7 +1051,7 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_
|
||||
|
||||
if (stream) {
|
||||
try {
|
||||
stream->lockedSymbolCopyAsync(acc, dst, (void*)src, count, offset, kind);
|
||||
hip_internal::memcpyAsync((char*)dst+offset, src, count, kind, stream);
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
@@ -1088,7 +1087,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
if (stream) {
|
||||
try {
|
||||
stream->lockedSymbolCopyAsync(acc, dst, src, count, offset, kind);
|
||||
hip_internal::memcpyAsync(dst, (char*)src+offset, count, kind, stream);
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
|
||||
@@ -258,12 +258,16 @@ struct Agent_global {
|
||||
uint32_t byte_cnt;
|
||||
};
|
||||
|
||||
inline void track(const Agent_global& x) {
|
||||
inline void track(const Agent_global& x, hsa_agent_t agent) {
|
||||
tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name.c_str(),
|
||||
x.address, x.byte_cnt);
|
||||
|
||||
auto device = ihipGetTlsDefaultCtx()->getWriteableDevice();
|
||||
|
||||
int deviceIndex =0;
|
||||
for ( deviceIndex = 0; deviceIndex < g_deviceCnt; deviceIndex++) {
|
||||
if(g_allAgents[deviceIndex] == agent)
|
||||
break;
|
||||
}
|
||||
auto device = ihipGetDevice(deviceIndex - 1);
|
||||
hc::AmPointerInfo ptr_info(nullptr, x.address, x.address, x.byte_cnt, device->_acc, true,
|
||||
false);
|
||||
hc::am_memtracker_add(x.address, ptr_info);
|
||||
@@ -276,7 +280,7 @@ inline void track(const Agent_global& x) {
|
||||
}
|
||||
|
||||
template <typename Container = vector<Agent_global>>
|
||||
inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t,
|
||||
inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t agent,
|
||||
hsa_executable_symbol_t x, void* out) {
|
||||
assert(out);
|
||||
|
||||
@@ -286,7 +290,7 @@ inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t,
|
||||
if (t == HSA_SYMBOL_KIND_VARIABLE) {
|
||||
static_cast<Container*>(out)->push_back(Agent_global{name(x), address(x), size(x)});
|
||||
|
||||
track(static_cast<Container*>(out)->back());
|
||||
track(static_cast<Container*>(out)->back(),agent);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle