SWDEV-306642 - [ABI Break] Add texture/surface/device capabilities device struct entries

- alias hipGetDeviceProperties to hipGetDevicePropertiesR0600
- alias hipDeviceProp_t to hipDeviceProp_tR0600
- remove gcnArch from new device property struct
- add new requested struct members

Change-Id: If3f5dbef3d608487d9f6f419285f4bf577ea9bf0


[ROCm/hipother commit: 0ca6451c91]
Этот коммит содержится в:
Jatin Chaudhary
2023-08-02 13:34:19 +01:00
коммит произвёл Rakesh Roy
родитель 17517817de
Коммит 63029a9739
2 изменённых файлов: 128 добавлений и 69 удалений
+11 -11
Просмотреть файл
@@ -32,11 +32,11 @@ THE SOFTWARE.
typedef int hipLaunchParm;
#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \
do { \
kernelName<<<numBlocks, numThreads, memPerBlock, streamId>>>(__VA_ARGS__); \
} while (0)
do { \
kernelName<<<numBlocks, numThreads, memPerBlock, streamId>>>(__VA_ARGS__); \
} while (0)
#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
#define hipReadModeElementType cudaReadModeElementType
@@ -105,15 +105,15 @@ typedef int hipLaunchParm;
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
#ifdef __HIP_DEVICE_COMPILE__
#define abort_() \
{ asm("trap;"); }
#define abort_() \
{ asm("trap;"); }
#undef assert
#define assert(COND) \
{ \
if (!COND) { \
abort_(); \
} \
}
{ \
if (!COND) { \
abort_(); \
} \
}
#endif
#define __clock() clock()
+117 -58
Просмотреть файл
@@ -1980,87 +1980,143 @@ inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int valu
}
inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
if (p_prop == NULL) {
return hipErrorInvalidValue;
return hipErrorInvalidValue;
}
struct cudaDeviceProp cdprop;
cudaError_t cerror;
cerror = cudaGetDeviceProperties(&cdprop, device);
hipError_t error = hipCUDAErrorTohipError(cudaGetDeviceProperties(&cdprop, device));
if (error != hipSuccess) {
return error;
}
strncpy(p_prop->name, cdprop.name, 256);
strncpy(p_prop->uuid.bytes, cdprop.uuid.bytes, 16);
strncpy(p_prop->luid, cdprop.luid, 8);
p_prop->luidDeviceNodeMask = cdprop.luidDeviceNodeMask;
p_prop->totalGlobalMem = cdprop.totalGlobalMem;
p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
p_prop->regsPerBlock = cdprop.regsPerBlock;
p_prop->warpSize = cdprop.warpSize;
p_prop->memPitch = cdprop.memPitch;
p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
for (int i = 0; i < 3; i++) {
p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
}
p_prop->maxThreadsDim[0] = cdprop.maxThreadsDim[0];
p_prop->maxThreadsDim[1] = cdprop.maxThreadsDim[1];
p_prop->maxThreadsDim[2] = cdprop.maxThreadsDim[2];
p_prop->maxGridSize[0] = cdprop.maxGridSize[0];
p_prop->maxGridSize[1] = cdprop.maxGridSize[1];
p_prop->maxGridSize[2] = cdprop.maxGridSize[2];
p_prop->clockRate = cdprop.clockRate;
p_prop->memoryClockRate = cdprop.memoryClockRate;
p_prop->memoryBusWidth = cdprop.memoryBusWidth;
p_prop->totalConstMem = cdprop.totalConstMem;
p_prop->major = cdprop.major;
p_prop->minor = cdprop.minor;
p_prop->textureAlignment = cdprop.textureAlignment;
p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
p_prop->deviceOverlap = cdprop.deviceOverlap;
p_prop->multiProcessorCount = cdprop.multiProcessorCount;
p_prop->l2CacheSize = cdprop.l2CacheSize;
p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
p_prop->computeMode = cdprop.computeMode;
p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
int ccVers = p_prop->major * 100 + p_prop->minor * 10;
p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
p_prop->arch.hasDoubles = (ccVers >= 130);
p_prop->arch.hasWarpVote = (ccVers >= 120);
p_prop->arch.hasWarpBallot = (ccVers >= 200);
p_prop->arch.hasWarpShuffle = (ccVers >= 300);
p_prop->arch.hasFunnelShift = (ccVers >= 350);
p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
p_prop->arch.has3dGrid = (ccVers >= 200);
p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
p_prop->concurrentKernels = cdprop.concurrentKernels;
p_prop->pciDomainID = cdprop.pciDomainID;
p_prop->pciBusID = cdprop.pciBusID;
p_prop->pciDeviceID = cdprop.pciDeviceID;
p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
p_prop->canMapHostMemory = cdprop.canMapHostMemory;
p_prop->gcnArch = 0; // Not a GCN arch
p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
p_prop->integrated = cdprop.integrated;
p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
p_prop->cooperativeMultiDeviceUnmatchedFunc = 0;
p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0;
p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0;
p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0;
p_prop->maxTexture1D = cdprop.maxTexture1D;
p_prop->canMapHostMemory = cdprop.canMapHostMemory;
p_prop->computeMode = cdprop.computeMode;
p_prop->maxTexture1D = cdprop.maxTexture1D;
p_prop->maxTexture1DMipmap = cdprop.maxTexture1DMipmap;
p_prop->maxTexture1DLinear = cdprop.maxTexture1DLinear;
p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
p_prop->maxTexture2DMipmap[0] = cdprop.maxTexture2DMipmap[0];
p_prop->maxTexture2DMipmap[1] = cdprop.maxTexture2DMipmap[1];
p_prop->maxTexture2DLinear[0] = cdprop.maxTexture2DLinear[0];
p_prop->maxTexture2DLinear[1] = cdprop.maxTexture2DLinear[1];
p_prop->maxTexture2DLinear[2] = cdprop.maxTexture2DLinear[2];
p_prop->maxTexture2DGather[0] = cdprop.maxTexture2DGather[0];
p_prop->maxTexture2DGather[1] = cdprop.maxTexture2DGather[1];
p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
p_prop->maxTexture3DAlt[0] = cdprop.maxTexture3DAlt[0];
p_prop->maxTexture3DAlt[1] = cdprop.maxTexture3DAlt[1];
p_prop->maxTexture3DAlt[2] = cdprop.maxTexture3DAlt[2];
p_prop->maxTextureCubemap = cdprop.maxTextureCubemap;
p_prop->maxTexture1DLayered[0] = cdprop.maxTexture1DLayered[0];
p_prop->maxTexture1DLayered[1] = cdprop.maxTexture1DLayered[1];
p_prop->maxTexture2DLayered[0] = cdprop.maxTexture2DLayered[0];
p_prop->maxTexture2DLayered[1] = cdprop.maxTexture2DLayered[1];
p_prop->maxTexture2DLayered[2] = cdprop.maxTexture2DLayered[2];
p_prop->maxTextureCubemapLayered[0] = cdprop.maxTextureCubemapLayered[0];
p_prop->maxTextureCubemapLayered[1] = cdprop.maxTextureCubemapLayered[1];
p_prop->maxSurface1D = cdprop.maxSurface1D;
p_prop->maxSurface2D[0] = cdprop.maxSurface2D[0];
p_prop->maxSurface2D[1] = cdprop.maxSurface2D[1];
p_prop->maxSurface3D[0] = cdprop.maxSurface3D[0];
p_prop->maxSurface3D[1] = cdprop.maxSurface3D[1];
p_prop->maxSurface3D[2] = cdprop.maxSurface3D[2];
p_prop->maxSurface1DLayered[0] = cdprop.maxSurface1DLayered[0];
p_prop->maxSurface1DLayered[1] = cdprop.maxSurface1DLayered[1];
p_prop->maxSurface2DLayered[0] = cdprop.maxSurface2DLayered[0];
p_prop->maxSurface2DLayered[1] = cdprop.maxSurface2DLayered[1];
p_prop->maxSurface2DLayered[2] = cdprop.maxSurface2DLayered[2];
p_prop->maxSurfaceCubemap = cdprop.maxSurfaceCubemap;
p_prop->maxSurfaceCubemapLayered[0] = cdprop.maxSurfaceCubemapLayered[0];
p_prop->maxSurfaceCubemapLayered[1] = cdprop.maxSurfaceCubemapLayered[1];
p_prop->surfaceAlignment = cdprop.surfaceAlignment;
p_prop->concurrentKernels = cdprop.concurrentKernels;
p_prop->ECCEnabled = cdprop.ECCEnabled;
p_prop->pciBusID = cdprop.pciBusID;
p_prop->pciDeviceID = cdprop.pciDeviceID;
p_prop->pciDomainID = cdprop.pciDomainID;
p_prop->tccDriver = cdprop.tccDriver;
p_prop->asyncEngineCount = cdprop.asyncEngineCount;
p_prop->unifiedAddressing = cdprop.unifiedAddressing;
p_prop->memoryClockRate = cdprop.memoryClockRate;
p_prop->memoryBusWidth = cdprop.memoryBusWidth;
p_prop->l2CacheSize = cdprop.l2CacheSize;
p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
p_prop->streamPrioritiesSupported = cdprop.streamPrioritiesSupported;
p_prop->globalL1CacheSupported = cdprop.globalL1CacheSupported;
p_prop->localL1CacheSupported = cdprop.localL1CacheSupported;
p_prop->sharedMemPerMultiprocessor = cdprop.sharedMemPerMultiprocessor;
p_prop->regsPerMultiprocessor = cdprop.regsPerMultiprocessor;
p_prop->managedMemory = cdprop.managedMemory;
p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
p_prop->multiGpuBoardGroupID = cdprop.multiGpuBoardGroupID;
p_prop->hostNativeAtomicSupported = cdprop.hostNativeAtomicSupported;
p_prop->singleToDoublePrecisionPerfRatio = cdprop.singleToDoublePrecisionPerfRatio;
p_prop->pageableMemoryAccess = cdprop.pageableMemoryAccess;
p_prop->concurrentManagedAccess = cdprop.concurrentManagedAccess;
p_prop->computePreemptionSupported = cdprop.computePreemptionSupported;
p_prop->canUseHostPointerForRegisteredMem = cdprop.canUseHostPointerForRegisteredMem;
p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
p_prop->sharedMemPerBlockOptin = cdprop.sharedMemPerBlockOptin;
p_prop->pageableMemoryAccessUsesHostPageTables = cdprop.pageableMemoryAccessUsesHostPageTables;
p_prop->directManagedMemAccessFromHost = cdprop.directManagedMemAccessFromHost;
p_prop->memPitch = cdprop.memPitch;
p_prop->textureAlignment = cdprop.textureAlignment;
p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
p_prop->ECCEnabled = cdprop.ECCEnabled;
p_prop->tccDriver = cdprop.tccDriver;
return hipCUDAErrorTohipError(cerror);
#if CUDA_VERSION >= 11010
p_prop->accessPolicyMaxWindowSize = cdprop.accessPolicyMaxWindowSize;
p_prop->maxBlocksPerMultiProcessor = cdprop.maxBlocksPerMultiProcessor;
p_prop->persistingL2CacheMaxSize = cdprop.persistingL2CacheMaxSize;
p_prop->reservedSharedMemPerBlock = cdprop.reservedSharedMemPerBlock;
p_prop->warpSize = cdprop.warpSize;
#endif
#if CUDA_VERSION >= 12000
p_prop->clusterLaunch = cdprop.clusterLaunch;
p_prop->deferredMappingHipArraySupported = cdprop.deferredMappingCudaArraySupported;
p_prop->gpuDirectRDMAFlushWritesOptions = cdprop.gpuDirectRDMAFlushWritesOptions;
p_prop->gpuDirectRDMASupported = cdprop.gpuDirectRDMASupported;
p_prop->gpuDirectRDMAWritesOrdering = cdprop.gpuDirectRDMAWritesOrdering;
p_prop->hostRegisterReadOnlySupported = cdprop.hostRegisterReadOnlySupported;
p_prop->hostRegisterSupported = cdprop.hostRegisterSupported;
p_prop->ipcEventSupported = cdprop.ipcEventSupported;
p_prop->memoryPoolSupportedHandleTypes = cdprop.memoryPoolSupportedHandleTypes;
p_prop->memoryPoolsSupported = cdprop.memoryPoolsSupported;
p_prop->sparseHipArraySupported = cdprop.sparseCudaArraySupported;
p_prop->timelineSemaphoreInteropSupported = cdprop.timelineSemaphoreInteropSupported;
p_prop->unifiedFunctionPointers = cdprop.unifiedFunctionPointers;
#endif
return error;
}
inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
@@ -2188,6 +2244,9 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
break;
case hipDeviceAttributeHostRegisterSupported:
cdattr = cudaDevAttrHostRegisterSupported;
break;
case hipDeviceAttributeConcurrentManagedAccess:
cdattr = cudaDevAttrConcurrentManagedAccess;
break;