From d5f73d22aaacecd2697ecd2665e3a8073b70e501 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 1 Aug 2019 13:16:39 +0530 Subject: [PATCH] [nvcc] Populate missing fields in hipGetDeviceProperties Change-Id: Ie90e02674d503e385f144f1ead3d53ff7b49cecc [ROCm/clr commit: b24a4000f8642818318f36df32c0553b610e7883] --- .../include/hip/nvcc_detail/hip_runtime_api.h | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 2c294c91a3..fb4987feee 100644 --- a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -715,17 +715,20 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int dev struct cudaDeviceProp cdprop; cudaError_t cerror; cerror = cudaGetDeviceProperties(&cdprop, device); + strncpy(p_prop->name, cdprop.name, 256); p_prop->totalGlobalMem = cdprop.totalGlobalMem; p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock; p_prop->regsPerBlock = cdprop.regsPerBlock; p_prop->warpSize = cdprop.warpSize; + p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock; for (int i = 0; i < 3; i++) { p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i]; p_prop->maxGridSize[i] = cdprop.maxGridSize[i]; } - p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock; p_prop->clockRate = cdprop.clockRate; + p_prop->memoryClockRate = cdprop.memoryClockRate; + p_prop->memoryBusWidth = cdprop.memoryBusWidth; p_prop->totalConstMem = cdprop.totalConstMem; p_prop->major = cdprop.major; p_prop->minor = cdprop.minor; @@ -733,44 +736,38 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int dev p_prop->l2CacheSize = cdprop.l2CacheSize; p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor; p_prop->computeMode = cdprop.computeMode; - p_prop->canMapHostMemory = cdprop.canMapHostMemory; - p_prop->memoryClockRate = cdprop.memoryClockRate; - p_prop->memoryBusWidth = cdprop.memoryBusWidth; - - // Same as clock-rate: - p_prop->clockInstructionRate = cdprop.clockRate; + p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate: int ccVers = p_prop->major * 100 + p_prop->minor * 10; - p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110); p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110); p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120); p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120); - p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200); - p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120); p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110); - p_prop->arch.hasDoubles = (ccVers >= 130); - p_prop->arch.hasWarpVote = (ccVers >= 120); p_prop->arch.hasWarpBallot = (ccVers >= 200); p_prop->arch.hasWarpShuffle = (ccVers >= 300); p_prop->arch.hasFunnelShift = (ccVers >= 350); - p_prop->arch.hasThreadFenceSystem = (ccVers >= 200); p_prop->arch.hasSyncThreadsExt = (ccVers >= 200); - p_prop->arch.hasSurfaceFuncs = (ccVers >= 200); p_prop->arch.has3dGrid = (ccVers >= 200); p_prop->arch.hasDynamicParallelism = (ccVers >= 350); p_prop->concurrentKernels = cdprop.concurrentKernels; + p_prop->pciDomainID = cdprop.pciDomainID; p_prop->pciBusID = cdprop.pciBusID; p_prop->pciDeviceID = cdprop.pciDeviceID; p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor; p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard; + p_prop->canMapHostMemory = cdprop.canMapHostMemory; + p_prop->gcnArch = 0; // Not a GCN arch + p_prop->integrated = cdprop.integrated; + p_prop->cooperativeLaunch = cdprop.cooperativeLaunch; + p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch; p_prop->maxTexture1D = cdprop.maxTexture1D; p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];