Add hipHostMallocCoherent, hipHostMallocNonCoherent
Provide per-allocation control over coherent/non-coherent mem. These overrid the default HIP_COHERENT_HOST_ALLOC setting.
Bu işleme şunda yer alıyor:
@@ -111,17 +111,21 @@ enum hipLimit_t
|
||||
|
||||
//! Flags that can be used with hipHostMalloc
|
||||
#define hipHostMallocDefault 0x0
|
||||
#define hipHostMallocPortable 0x1
|
||||
#define hipHostMallocMapped 0x2
|
||||
#define hipHostMallocPortable 0x1 ///< Memory is considered allocated by all contexts.
|
||||
#define hipHostMallocMapped 0x2 ///< Map the allocation into the address space for the current device. The device pointer can be obtained with #hipHostGetDevicePointer.
|
||||
#define hipHostMallocWriteCombined 0x4
|
||||
#define hipHostMallocCoherent 0x40000000 ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.
|
||||
#define hipHostMallocNonCoherent 0x80000000 ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific allocation.
|
||||
|
||||
|
||||
//! Flags that can be used with hipHostRegister
|
||||
#define hipHostRegisterDefault 0x0 ///< Memory is Mapped and Portable
|
||||
#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts. HIP only supports one context so this is always assumed true.
|
||||
#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts.
|
||||
#define hipHostRegisterMapped 0x2 ///< Map the allocation into the address space for the current device. The device pointer can be obtained with #hipHostGetDevicePointer.
|
||||
#define hipHostRegisterIoMemory 0x4 ///< Not supported.
|
||||
|
||||
|
||||
|
||||
#define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield
|
||||
#define hipDeviceScheduleSpin 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may consume more power.
|
||||
#define hipDeviceScheduleYield 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers power and is friendlier to other threads in the system.
|
||||
|
||||
@@ -65,6 +65,8 @@ hipMemcpyHostToHost
|
||||
#define hipHostMallocPortable cudaHostAllocPortable
|
||||
#define hipHostMallocMapped cudaHostAllocMapped
|
||||
#define hipHostMallocWriteCombined cudaHostAllocWriteCombined
|
||||
#define hipHostMallocCoherent 0x0
|
||||
#define hipHostMallocNonCoherent 0x0
|
||||
|
||||
#define hipHostRegisterPortable cudaHostRegisterPortable
|
||||
#define hipHostRegisterMapped cudaHostRegisterMapped
|
||||
|
||||
@@ -74,7 +74,7 @@ int HIP_PROFILE_API= 0;
|
||||
std::string HIP_DB_START_API;
|
||||
std::string HIP_DB_STOP_API;
|
||||
int HIP_DB= 0;
|
||||
int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */
|
||||
int HIP_VISIBLE_DEVICES = 0;
|
||||
int HIP_NUM_KERNELS_INFLIGHT = 128;
|
||||
int HIP_WAIT_MODE = 0;
|
||||
|
||||
|
||||
@@ -267,17 +267,36 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
trueFlags = hipHostMallocMapped | hipHostMallocPortable;
|
||||
}
|
||||
|
||||
const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped | hipHostMallocWriteCombined;
|
||||
|
||||
if (flags & ~supportedFlags) {
|
||||
const unsigned supportedFlags = hipHostMallocPortable
|
||||
| hipHostMallocMapped
|
||||
| hipHostMallocWriteCombined
|
||||
| hipHostMallocCoherent
|
||||
| hipHostMallocNonCoherent;
|
||||
|
||||
|
||||
const unsigned coherencyFlags = hipHostMallocCoherent | hipHostMallocNonCoherent;
|
||||
|
||||
if ((flags & ~supportedFlags) ||
|
||||
((flags & coherencyFlags) == coherencyFlags)) {
|
||||
*ptr = nullptr;
|
||||
// can't specify unsupported flags, can't specify both Coherent + NonCoherent
|
||||
hip_status = hipErrorInvalidValue;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
auto device = ctx->getWriteableDevice();
|
||||
unsigned amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned;
|
||||
|
||||
unsigned amFlags = 0;
|
||||
if (flags & hipHostMallocCoherent) {
|
||||
amFlags = amHostCoherent;
|
||||
} else if (flags & hipHostMallocNonCoherent) {
|
||||
amFlags = amHostPinned;
|
||||
} else {
|
||||
// depends on env variables:
|
||||
amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned;
|
||||
}
|
||||
|
||||
|
||||
*ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host",
|
||||
*ptr = hip_internal::allocAndSharePtr((amFlags & amHostCoherent) ? "finegrained_host":"pinned_host",
|
||||
sizeBytes, ctx, (trueFlags & hipHostMallocPortable) /*shareWithAll*/, amFlags, flags);
|
||||
|
||||
if(sizeBytes && (*ptr == NULL)){
|
||||
|
||||
@@ -31,14 +31,19 @@
|
||||
#define LEN 1024*1024
|
||||
#define SIZE LEN*sizeof(float)
|
||||
|
||||
__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){
|
||||
__global__ void Add(float *Ad, float *Bd, float *Cd){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
Cd[tx] = Ad[tx] + Bd[tx];
|
||||
}
|
||||
|
||||
|
||||
__global__ void Set(int *Ad, int val){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
Ad[tx] = val;
|
||||
}
|
||||
|
||||
int main(){
|
||||
float *A, *B, *C;
|
||||
float *Ad, *Bd, *Cd;
|
||||
|
||||
|
||||
hipDeviceProp_t prop;
|
||||
int device;
|
||||
@@ -49,26 +54,72 @@ int main(){
|
||||
failed("Does support HostPinned Memory");
|
||||
}
|
||||
|
||||
HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped));
|
||||
HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocDefault));
|
||||
HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped));
|
||||
|
||||
HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0));
|
||||
HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0));
|
||||
{
|
||||
float *A, *B, *C;
|
||||
float *Ad, *Bd, *Cd;
|
||||
HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped));
|
||||
HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocDefault));
|
||||
HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped));
|
||||
|
||||
for(int i=0;i<LEN;i++){
|
||||
A[i] = 1.0f;
|
||||
B[i] = 2.0f;
|
||||
HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0));
|
||||
HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0));
|
||||
|
||||
for(int i=0;i<LEN;i++){
|
||||
A[i] = 1.0f;
|
||||
B[i] = 2.0f;
|
||||
}
|
||||
|
||||
HIPCHECK(hipMalloc((void**)&Bd, SIZE));
|
||||
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
|
||||
|
||||
dim3 dimGrid(LEN/512,1,1);
|
||||
dim3 dimBlock(512,1,1);
|
||||
|
||||
hipLaunchKernelGGL(Add, dimGrid, dimBlock, 0, 0, Ad, Bd, Cd);
|
||||
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HIPCHECK(hipHostFree(A));
|
||||
HIPCHECK(hipHostFree(B));
|
||||
HIPCHECK(hipHostFree(C));
|
||||
}
|
||||
|
||||
HIPCHECK(hipMalloc((void**)&Bd, SIZE));
|
||||
HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
|
||||
{
|
||||
int *A, *B;
|
||||
int numElements = 1024*16;
|
||||
size_t sizeBytes = numElements * sizeof (int);
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
HIPCHECK_API(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent|hipHostMallocNonCoherent), hipErrorInvalidValue);
|
||||
|
||||
dim3 dimGrid(LEN/512,1,1);
|
||||
dim3 dimBlock(512,1,1);
|
||||
assert (A == 0);
|
||||
#endif
|
||||
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(Add), dimGrid, dimBlock, 0, 0, Ad, Bd, Cd);
|
||||
HIPCHECK(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent));
|
||||
hipStream_t s;
|
||||
hipEvent_t e;
|
||||
|
||||
// Init:
|
||||
HIPCHECK(hipStreamCreate(&s));
|
||||
HIPCHECK(hipEventCreateWithFlags(&e, 0));
|
||||
dim3 dimBlock(64,1,1);
|
||||
dim3 dimGrid(numElements/dimBlock.x,1,1);
|
||||
|
||||
// Init array to know state:
|
||||
hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, 0x0, A, -42);
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, s, A, 13);
|
||||
HIPCHECK(hipEventRecord(e, s));
|
||||
|
||||
// Host waits for event :
|
||||
HIPCHECK(hipEventSynchronize(e));
|
||||
|
||||
// check result?
|
||||
|
||||
HIPCHECK(hipHostMalloc((void**)&B, sizeBytes, hipHostMallocNonCoherent));
|
||||
}
|
||||
|
||||
passed();
|
||||
|
||||
}
|
||||
|
||||
@@ -185,6 +185,8 @@ syn keyword hipFlags hipHostMallocDefault
|
||||
syn keyword hipFlags hipHostMallocPortable
|
||||
syn keyword hipFlags hipHostMallocMapped
|
||||
syn keyword hipFlags hipHostMallocWriteCombined
|
||||
syn keyword hipFlags hipHostMallocCoherent
|
||||
syn keyword hipFlags hipHostMallocNonCoherent
|
||||
|
||||
syn keyword hipFlags hipHostRegisterDefault
|
||||
syn keyword hipFlags hipHostRegisterPortable
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle