added feature for hipHostGetFlags for CUDA and HIP

[ROCm/hip commit: 75952029d6]
This commit is contained in:
Aditya Atluri
2016-03-06 12:17:30 -06:00
rodzic 5346c57411
commit 91dbc3114d
5 zmienionych plików z 103 dodań i 7 usunięć
@@ -687,6 +687,15 @@ hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) ;
*/
hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size) ;
/**
* @brief Get flags associated with host pointer
*
* @param[out] flagsPtr Memory location to store flags
* @param[in] hostPtr Host Pointer allocated through hipHostAlloc
* @return Error code
*/
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) ;
/**
* @brief Free memory allocated by the hcc hip memory allocation API.
* This API performs an implicit hipDeviceSynchronize() call.
@@ -128,6 +128,10 @@ inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, u
return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags));
}
inline static hipError_ hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr){
return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr));
}
inline static hipError_t hipFreeHost(void* ptr) {
return hipCUDAErrorTohipError(cudaFreeHost(ptr));
}
+29 -6
Wyświetl plik
@@ -2008,7 +2008,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags){
auto device = ihipGetTlsDefaultDevice();
if(device){
if(flags | hipHostAllocDefault){
if(flags & hipHostAllocDefault){
const unsigned am_flags = amHostPinned;
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
@@ -2021,7 +2021,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags){
}
tprintf(TRACE_MEM, " %s: pinned ptr=%p\n", __func__, *ptr);
}
if(flags | hipHostAllocMapped && device->_props.canMapHostMemory == 1){
if(flags & hipHostAllocMapped){
const unsigned am_flags = amHostPinned;
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
@@ -2030,10 +2030,10 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags){
}else{
#if USE_AM_TRACKER
hc::am_memtracker_update(*ptr, device->_device_index, flags);
void *srcPtr;
hsa_status_t hsa_status = hsa_amd_memory_lock((*ptr), sizeBytes, &device->_hsa_agent, 1, &srcPtr);
assert(hsa_status == HSA_STATUS_SUCCESS);
hc::am_memtracker_add(srcPtr, sizeBytes, device->_acc, false);
// void *srcPtr;
// hsa_status_t hsa_status = hsa_amd_memory_lock((*ptr), sizeBytes, &device->_hsa_agent, 1, &srcPtr);
// assert(hsa_status == HSA_STATUS_SUCCESS);
// hc::am_memtracker_add(srcPtr, sizeBytes, device->_acc, false);
#endif
}
tprintf(TRACE_MEM, " %s: pinned ptr=%p\n", __func__, *ptr);
@@ -2067,6 +2067,29 @@ hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size){
return ihipLogStatus(hip_status);
}
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
{
std::call_once(hip_initialized, ihipInit);
hipError_t hip_status = hipSuccess;
hc::accelerator acc;
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
if(status == AM_SUCCESS){
*flagsPtr = amPointerInfo._appAllocationFlags;
if(*flagsPtr == 0){
hip_status = hipErrorInvalidValue;
}
else{
hip_status = hipSuccess;
}
tprintf(TRACE_MEM, " %s: host ptr=%p\n", __func__, hostPtr);
}else{
hip_status = hipErrorInvalidValue;
}
return ihipLogStatus(hip_status);
}
//---
hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind)
{
+2 -1
Wyświetl plik
@@ -128,6 +128,7 @@ make_hip_executable (hipMultiThreadStreams1 hipMultiThreadStreams1.cpp)
make_hip_executable (hipMultiThreadStreams2 hipMultiThreadStreams2.cpp)
make_hip_executable (hipHostAlloc hipHostAlloc.cpp)
make_hip_executable (hipStreamL5 hipStreamL5.cpp)
make_hip_executable (hipHostGetFlags hipHostGetFlags.cpp)
target_link_libraries(hipMathFunctionsHost m)
make_test(hip_ballot " " )
@@ -149,7 +150,7 @@ make_test(hipMultiThreadStreams2 " " )
make_test(hipHostAlloc " ")
make_test(hipMemcpy " " )
make_test(hipMemcpyAsync " " )
make_test(hipHostGetFlags " ")
make_test(hipHcc " " )
make_test(hipStreamL5 " ")
@@ -0,0 +1,59 @@
#include"test_common.h"
#include<malloc.h>
#define LEN 1024*1024
#define SIZE LEN*sizeof(float)
__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
Cd[tx] = Ad[tx] + Bd[tx];
}
int main(){
float *A, *B, *C, *D;
float *Ad, *Bd, *Cd, *Dd;
unsigned int FlagA, FlagB, FlagC;
FlagA = hipHostAllocWriteCombined | hipHostAllocMapped;
FlagB = hipHostAllocWriteCombined | hipHostAllocMapped;
FlagC = hipHostAllocMapped;
hipDeviceProp_t prop;
int device;
HIPCHECK(hipGetDevice(&device));
HIPCHECK(hipGetDeviceProperties(&prop, device));
if(prop.canMapHostMemory != 1){
std::cout<<"Exiting..."<<std::endl;
}
HIPCHECK(hipHostAlloc((void**)&A, SIZE, hipHostAllocWriteCombined | hipHostAllocMapped));
HIPCHECK(hipHostAlloc((void**)&B, SIZE, hipHostAllocWriteCombined | hipHostAllocMapped));
HIPCHECK(hipHostAlloc((void**)&C, SIZE, hipHostAllocMapped));
HIPCHECK(hipHostAlloc((void**)&D, SIZE, hipHostAllocDefault));
unsigned int flagA, flagB, flagC;
HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0));
HIPCHECK(hipHostGetDevicePointer((void**)&Bd, B, 0));
HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0));
HIPCHECK(hipHostGetDevicePointer((void**)&Dd, D, 0));
HIPCHECK(hipHostGetFlags(&flagA, A));
HIPCHECK(hipHostGetFlags(&flagB, B));
HIPCHECK(hipHostGetFlags(&flagC, C));
for(int i=0;i<LEN;i++){
A[i] = 1.0f;
B[i] = 2.0f;
}
dim3 dimGrid(LEN/512,1,1);
dim3 dimBlock(512,1,1);
hipLaunchKernel(HIP_KERNEL_NAME(Add), dimGrid, dimBlock, 0, 0, Ad, Bd, Cd);
HIPCHECK(hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost));
HIPASSERT(C[10] == 3.0f);
HIPASSERT(flagA == FlagA);
HIPASSERT(flagB == FlagB);
HIPASSERT(flagC == FlagC);
passed();
}