2
0

added hipHostRegister/UnRegister api

[ROCm/clr commit: 109154f03f]
Este cometimento está contido em:
Aditya Atluri
2016-04-15 10:08:10 -05:00
ascendente 1293709fc0
cometimento 6ee096fbfc
2 ficheiros modificados com 33 adições e 45 eliminações
+19 -20
Ver ficheiro
@@ -103,7 +103,6 @@ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsi
*devicePointer = NULL;
}
}
return ihipLogStatus(e);
}
@@ -132,11 +131,8 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
hc::am_memtracker_update(*ptr, device->_device_index, 0);
{
LockedAccessor_DeviceCrit_t crit(device->criticalData());
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
hip_status = hipErrorMemoryAllocation;
}
if (crit->peerCnt()) {
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
}
}
}
@@ -173,13 +169,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
}else{
hc::am_memtracker_update(*ptr, device->_device_index, flags);
{
// TODO - allow_access only works for device memory, need to change am_alloc to allocate host directly.
LockedAccessor_DeviceCrit_t crit(device->criticalData());
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
hip_status = hipErrorMemoryAllocation;
}
if (crit->peerCnt()) {
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
}
}
}
@@ -246,11 +238,16 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
}
if(device){
if(flags == hipHostRegisterDefault){
#if USE_HCC_LOCK
am_status_t am_status = hc::am_memtracker_host_memory_lock(device->_acc, hostPtr, sizeBytes);
#else
am_status_t am_status = AM_ERROR_MISC;
#endif
am_status_t am_status;
std::vector<hc::accelerator>vecAcc;
for(int i=0;i<g_deviceCnt;i++){
// if(device->_acc.get_is_peer(g_devices[i]._acc)){
//hsa_status_t hsa_status = hsa_amd_memory_lock(hostPtr, sizeBytes, &g_devices[i]._hsa_agent, 1, &srcPtr);
vecAcc.push_back(g_devices[i]._acc);
// }
}
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
// hsa_status_t hsa_status = hsa_amd_memory_lock(hostPtr, sizeBytes, &device->_hsa_agent, 1, &srcPtr);
if(am_status == AM_SUCCESS){
hip_status = hipSuccess;
@@ -272,13 +269,13 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
hipError_t hipHostUnregister(void *hostPtr)
{
HIP_INIT_API(hostPtr);
auto device = ihipGetTlsDefaultDevice();
hipError_t hip_status = hipSuccess;
if(hostPtr == NULL){
hip_status = hipErrorInvalidValue;
}else{
hsa_status_t hsa_status = hsa_amd_memory_unlock(hostPtr);
if(hsa_status != HSA_STATUS_SUCCESS){
am_status_t am_status = hc::am_memory_host_unlock(device->_acc, hostPtr);
if(am_status != AM_SUCCESS){
hip_status = hipErrorInvalidValue;
// TODO: Add a different return error. This is not true
}
@@ -448,6 +445,8 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total)
// TODO - replace with kernel-level for reporting free memory:
size_t deviceMemSize, hostMemSize, userMemSize;
hc::am_memtracker_sizeinfo(hipDevice->_acc, &deviceMemSize, &hostMemSize, &userMemSize);
printf ("deviceMemSize=%zu\n", deviceMemSize);
*free = hipDevice->_props.totalGlobalMem - deviceMemSize;
}
+14 -25
Ver ficheiro
@@ -26,40 +26,29 @@ Ad[tx] = Ad[tx] + float(1);
}
int main(){
float *A, *Ad;
float *A, **Ad;
int num_devices;
HIPCHECK(hipGetDeviceCount(&num_devices));
Ad = new float*[num_devices];
const size_t size = N * sizeof(float);
A = (float*)malloc(size*2);
A = (float*)malloc(size);
HIPCHECK(hipHostRegister(A, size, 0));
for(int i=0;i<N;i++){
A[i] = float(1);
}
for(int i=0;i<num_devices;i++){
HIPCHECK(hipSetDevice(i));
HIPCHECK(hipHostGetDevicePointer((void**)&Ad[i], A, 0));
}
HIPCHECK(hipMalloc(&Ad, size));
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
for(int i=0;i<num_devices;i++){
HIPCHECK(hipSetDevice(i));
hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, 0, Ad[i]);
HIPCHECK(hipMemcpyAsync(Ad, A, size, hipMemcpyHostToDevice, stream));
hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Ad);
HIPCHECK(hipDeviceSynchronize());
HIPCHECK(hipMemcpyAsync(A, Ad, size, hipMemcpyDeviceToHost, stream));
HIPASSERT(A[10] == 2.0f);
HIPCHECK(hipMemcpy(Ad, A, size, hipMemcpyHostToDevice));
hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, 0, Ad);
HIPCHECK(hipDeviceSynchronize());
HIPCHECK(hipMemcpy(A, Ad, size, hipMemcpyDeviceToHost));
HIPASSERT(A[10] == 3.0f);
}
HIPASSERT(A[10] == 1.0f + float(num_devices));
HIPCHECK(hipHostUnregister(A));
HIPCHECK(hipStreamDestroy(stream));
passed();
}