SWDEV-306642 - [ABI Break] Add texture/surface/device capabilities device struct entries

- alias hipGetDeviceProperties to hipGetDevicePropertiesR0600
- alias hipDeviceProp_t to hipDeviceProp_tR0600
- remove gcnArch from new device property struct
- add new requested struct members

Change-Id: If3f5dbef3d608487d9f6f419285f4bf577ea9bf0
Цей коміт міститься в:
Jatin Chaudhary
2023-08-02 13:34:19 +01:00
зафіксовано Rakesh Roy
джерело 813907c29d
коміт 2989840511
12 змінених файлів з 854 додано та 430 видалено
+23 -23
Переглянути файл
@@ -93,7 +93,7 @@ enum hip_api_id_t {
HIP_API_ID_hipGetDevice = 78,
HIP_API_ID_hipGetDeviceCount = 79,
HIP_API_ID_hipGetDeviceFlags = 80,
HIP_API_ID_hipGetDeviceProperties = 81,
HIP_API_ID_hipGetDevicePropertiesR0600 = 81,
HIP_API_ID_RESERVED_82 = 82,
HIP_API_ID_hipGetErrorString = 83,
HIP_API_ID_hipGetLastError = 84,
@@ -518,7 +518,7 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipGetDevice: return "hipGetDevice";
case HIP_API_ID_hipGetDeviceCount: return "hipGetDeviceCount";
case HIP_API_ID_hipGetDeviceFlags: return "hipGetDeviceFlags";
case HIP_API_ID_hipGetDeviceProperties: return "hipGetDeviceProperties";
case HIP_API_ID_hipGetDevicePropertiesR0600: return "hipGetDevicePropertiesR0600";
case HIP_API_ID_hipGetErrorString: return "hipGetErrorString";
case HIP_API_ID_hipGetLastError: return "hipGetLastError";
case HIP_API_ID_hipGetMipmappedArrayLevel: return "hipGetMipmappedArrayLevel";
@@ -885,7 +885,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
if (strcmp("hipGetDevice", name) == 0) return HIP_API_ID_hipGetDevice;
if (strcmp("hipGetDeviceCount", name) == 0) return HIP_API_ID_hipGetDeviceCount;
if (strcmp("hipGetDeviceFlags", name) == 0) return HIP_API_ID_hipGetDeviceFlags;
if (strcmp("hipGetDeviceProperties", name) == 0) return HIP_API_ID_hipGetDeviceProperties;
if (strcmp("hipGetDevicePropertiesR0600", name) == 0) return HIP_API_ID_hipGetDevicePropertiesR0600;
if (strcmp("hipGetErrorString", name) == 0) return HIP_API_ID_hipGetErrorString;
if (strcmp("hipGetLastError", name) == 0) return HIP_API_ID_hipGetLastError;
if (strcmp("hipGetMipmappedArrayLevel", name) == 0) return HIP_API_ID_hipGetMipmappedArrayLevel;
@@ -1213,8 +1213,8 @@ typedef struct hip_api_data_s {
struct {
int* device;
int device__val;
const hipDeviceProp_t* prop;
hipDeviceProp_t prop__val;
const hipDeviceProp_tR0600* prop;
hipDeviceProp_tR0600 prop__val;
} hipChooseDevice;
struct {
dim3 gridDim;
@@ -1628,10 +1628,10 @@ typedef struct hip_api_data_s {
unsigned int flags__val;
} hipGetDeviceFlags;
struct {
hipDeviceProp_t* props;
hipDeviceProp_t props__val;
hipDeviceProp_tR0600* props;
hipDeviceProp_tR0600 props__val;
hipDevice_t device;
} hipGetDeviceProperties;
} hipGetDevicePropertiesR0600;
struct {
hipArray_t* levelArray;
hipArray_t levelArray__val;
@@ -3325,10 +3325,10 @@ typedef struct hip_api_data_s {
cb_data.args.hipArrayGetInfo.flags = (unsigned int*)flags; \
cb_data.args.hipArrayGetInfo.array = (hipArray*)array; \
};
// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')]
// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_tR0600*', 'prop')]
#define INIT_hipChooseDevice_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipChooseDevice.device = (int*)device; \
cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_t*)properties; \
cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_tR0600*)properties; \
};
// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
#define INIT_hipConfigureCall_CB_ARGS_DATA(cb_data) { \
@@ -3779,10 +3779,10 @@ typedef struct hip_api_data_s {
#define INIT_hipGetDeviceFlags_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipGetDeviceFlags.flags = (unsigned int*)flags; \
};
// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')]
#define INIT_hipGetDeviceProperties_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipGetDeviceProperties.props = (hipDeviceProp_t*)props; \
cb_data.args.hipGetDeviceProperties.device = (hipDevice_t)device; \
// hipGetDeviceProperties[('hipDeviceProp_tR0600*', 'props'), ('hipDevice_t', 'device')]
#define INIT_hipGetDevicePropertiesR0600_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipGetDevicePropertiesR0600.props = (hipDeviceProp_tR0600*)props; \
cb_data.args.hipGetDevicePropertiesR0600.device = (hipDevice_t)device; \
};
// hipGetErrorString[]
#define INIT_hipGetErrorString_CB_ARGS_DATA(cb_data) { \
@@ -5573,7 +5573,7 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
if (data->args.hipArrayGetInfo.flags) data->args.hipArrayGetInfo.flags__val = *(data->args.hipArrayGetInfo.flags);
if (data->args.hipArrayGetInfo.array) data->args.hipArrayGetInfo.array__val = *(data->args.hipArrayGetInfo.array);
break;
// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')]
// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_tR0600*', 'prop')]
case HIP_API_ID_hipChooseDevice:
if (data->args.hipChooseDevice.device) data->args.hipChooseDevice.device__val = *(data->args.hipChooseDevice.device);
if (data->args.hipChooseDevice.prop) data->args.hipChooseDevice.prop__val = *(data->args.hipChooseDevice.prop);
@@ -5908,9 +5908,9 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
case HIP_API_ID_hipGetDeviceFlags:
if (data->args.hipGetDeviceFlags.flags) data->args.hipGetDeviceFlags.flags__val = *(data->args.hipGetDeviceFlags.flags);
break;
// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')]
case HIP_API_ID_hipGetDeviceProperties:
if (data->args.hipGetDeviceProperties.props) data->args.hipGetDeviceProperties.props__val = *(data->args.hipGetDeviceProperties.props);
// hipGetDevicePropertiesR0600[('hipDeviceProp_tR0600*', 'props'), ('hipDevice_t', 'device')]
case HIP_API_ID_hipGetDevicePropertiesR0600:
if (data->args.hipGetDevicePropertiesR0600.props) data->args.hipGetDevicePropertiesR0600.props__val = *(data->args.hipGetDevicePropertiesR0600.props);
break;
// hipGetErrorString[]
case HIP_API_ID_hipGetErrorString:
@@ -7626,11 +7626,11 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceFlags.flags__val); }
oss << ")";
break;
case HIP_API_ID_hipGetDeviceProperties:
oss << "hipGetDeviceProperties(";
if (data->args.hipGetDeviceProperties.props == NULL) oss << "props=NULL";
else { oss << "props="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceProperties.props__val); }
oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceProperties.device);
case HIP_API_ID_hipGetDevicePropertiesR0600:
oss << "hipGetDevicePropertiesR0600(";
if (data->args.hipGetDevicePropertiesR0600.props == NULL) oss << "props=NULL";
else { oss << "props="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0600.props__val); }
oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0600.device);
oss << ")";
break;
case HIP_API_ID_hipGetErrorString:
+11 -11
Переглянути файл
@@ -32,11 +32,11 @@ THE SOFTWARE.
typedef int hipLaunchParm;
#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \
do { \
kernelName<<<numBlocks, numThreads, memPerBlock, streamId>>>(__VA_ARGS__); \
} while (0)
do { \
kernelName<<<numBlocks, numThreads, memPerBlock, streamId>>>(__VA_ARGS__); \
} while (0)
#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
#define hipReadModeElementType cudaReadModeElementType
@@ -105,15 +105,15 @@ typedef int hipLaunchParm;
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
#ifdef __HIP_DEVICE_COMPILE__
#define abort_() \
{ asm("trap;"); }
#define abort_() \
{ asm("trap;"); }
#undef assert
#define assert(COND) \
{ \
if (!COND) { \
abort_(); \
} \
}
{ \
if (!COND) { \
abort_(); \
} \
}
#endif
#define __clock() clock()
+117 -58
Переглянути файл
@@ -1980,87 +1980,143 @@ inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int valu
}
inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
if (p_prop == NULL) {
return hipErrorInvalidValue;
return hipErrorInvalidValue;
}
struct cudaDeviceProp cdprop;
cudaError_t cerror;
cerror = cudaGetDeviceProperties(&cdprop, device);
hipError_t error = hipCUDAErrorTohipError(cudaGetDeviceProperties(&cdprop, device));
if (error != hipSuccess) {
return error;
}
strncpy(p_prop->name, cdprop.name, 256);
strncpy(p_prop->uuid.bytes, cdprop.uuid.bytes, 16);
strncpy(p_prop->luid, cdprop.luid, 8);
p_prop->luidDeviceNodeMask = cdprop.luidDeviceNodeMask;
p_prop->totalGlobalMem = cdprop.totalGlobalMem;
p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
p_prop->regsPerBlock = cdprop.regsPerBlock;
p_prop->warpSize = cdprop.warpSize;
p_prop->memPitch = cdprop.memPitch;
p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
for (int i = 0; i < 3; i++) {
p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
}
p_prop->maxThreadsDim[0] = cdprop.maxThreadsDim[0];
p_prop->maxThreadsDim[1] = cdprop.maxThreadsDim[1];
p_prop->maxThreadsDim[2] = cdprop.maxThreadsDim[2];
p_prop->maxGridSize[0] = cdprop.maxGridSize[0];
p_prop->maxGridSize[1] = cdprop.maxGridSize[1];
p_prop->maxGridSize[2] = cdprop.maxGridSize[2];
p_prop->clockRate = cdprop.clockRate;
p_prop->memoryClockRate = cdprop.memoryClockRate;
p_prop->memoryBusWidth = cdprop.memoryBusWidth;
p_prop->totalConstMem = cdprop.totalConstMem;
p_prop->major = cdprop.major;
p_prop->minor = cdprop.minor;
p_prop->textureAlignment = cdprop.textureAlignment;
p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
p_prop->deviceOverlap = cdprop.deviceOverlap;
p_prop->multiProcessorCount = cdprop.multiProcessorCount;
p_prop->l2CacheSize = cdprop.l2CacheSize;
p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
p_prop->computeMode = cdprop.computeMode;
p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
int ccVers = p_prop->major * 100 + p_prop->minor * 10;
p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
p_prop->arch.hasDoubles = (ccVers >= 130);
p_prop->arch.hasWarpVote = (ccVers >= 120);
p_prop->arch.hasWarpBallot = (ccVers >= 200);
p_prop->arch.hasWarpShuffle = (ccVers >= 300);
p_prop->arch.hasFunnelShift = (ccVers >= 350);
p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
p_prop->arch.has3dGrid = (ccVers >= 200);
p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
p_prop->concurrentKernels = cdprop.concurrentKernels;
p_prop->pciDomainID = cdprop.pciDomainID;
p_prop->pciBusID = cdprop.pciBusID;
p_prop->pciDeviceID = cdprop.pciDeviceID;
p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
p_prop->canMapHostMemory = cdprop.canMapHostMemory;
p_prop->gcnArch = 0; // Not a GCN arch
p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
p_prop->integrated = cdprop.integrated;
p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
p_prop->cooperativeMultiDeviceUnmatchedFunc = 0;
p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0;
p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0;
p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0;
p_prop->maxTexture1D = cdprop.maxTexture1D;
p_prop->canMapHostMemory = cdprop.canMapHostMemory;
p_prop->computeMode = cdprop.computeMode;
p_prop->maxTexture1D = cdprop.maxTexture1D;
p_prop->maxTexture1DMipmap = cdprop.maxTexture1DMipmap;
p_prop->maxTexture1DLinear = cdprop.maxTexture1DLinear;
p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
p_prop->maxTexture2DMipmap[0] = cdprop.maxTexture2DMipmap[0];
p_prop->maxTexture2DMipmap[1] = cdprop.maxTexture2DMipmap[1];
p_prop->maxTexture2DLinear[0] = cdprop.maxTexture2DLinear[0];
p_prop->maxTexture2DLinear[1] = cdprop.maxTexture2DLinear[1];
p_prop->maxTexture2DLinear[2] = cdprop.maxTexture2DLinear[2];
p_prop->maxTexture2DGather[0] = cdprop.maxTexture2DGather[0];
p_prop->maxTexture2DGather[1] = cdprop.maxTexture2DGather[1];
p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
p_prop->maxTexture3DAlt[0] = cdprop.maxTexture3DAlt[0];
p_prop->maxTexture3DAlt[1] = cdprop.maxTexture3DAlt[1];
p_prop->maxTexture3DAlt[2] = cdprop.maxTexture3DAlt[2];
p_prop->maxTextureCubemap = cdprop.maxTextureCubemap;
p_prop->maxTexture1DLayered[0] = cdprop.maxTexture1DLayered[0];
p_prop->maxTexture1DLayered[1] = cdprop.maxTexture1DLayered[1];
p_prop->maxTexture2DLayered[0] = cdprop.maxTexture2DLayered[0];
p_prop->maxTexture2DLayered[1] = cdprop.maxTexture2DLayered[1];
p_prop->maxTexture2DLayered[2] = cdprop.maxTexture2DLayered[2];
p_prop->maxTextureCubemapLayered[0] = cdprop.maxTextureCubemapLayered[0];
p_prop->maxTextureCubemapLayered[1] = cdprop.maxTextureCubemapLayered[1];
p_prop->maxSurface1D = cdprop.maxSurface1D;
p_prop->maxSurface2D[0] = cdprop.maxSurface2D[0];
p_prop->maxSurface2D[1] = cdprop.maxSurface2D[1];
p_prop->maxSurface3D[0] = cdprop.maxSurface3D[0];
p_prop->maxSurface3D[1] = cdprop.maxSurface3D[1];
p_prop->maxSurface3D[2] = cdprop.maxSurface3D[2];
p_prop->maxSurface1DLayered[0] = cdprop.maxSurface1DLayered[0];
p_prop->maxSurface1DLayered[1] = cdprop.maxSurface1DLayered[1];
p_prop->maxSurface2DLayered[0] = cdprop.maxSurface2DLayered[0];
p_prop->maxSurface2DLayered[1] = cdprop.maxSurface2DLayered[1];
p_prop->maxSurface2DLayered[2] = cdprop.maxSurface2DLayered[2];
p_prop->maxSurfaceCubemap = cdprop.maxSurfaceCubemap;
p_prop->maxSurfaceCubemapLayered[0] = cdprop.maxSurfaceCubemapLayered[0];
p_prop->maxSurfaceCubemapLayered[1] = cdprop.maxSurfaceCubemapLayered[1];
p_prop->surfaceAlignment = cdprop.surfaceAlignment;
p_prop->concurrentKernels = cdprop.concurrentKernels;
p_prop->ECCEnabled = cdprop.ECCEnabled;
p_prop->pciBusID = cdprop.pciBusID;
p_prop->pciDeviceID = cdprop.pciDeviceID;
p_prop->pciDomainID = cdprop.pciDomainID;
p_prop->tccDriver = cdprop.tccDriver;
p_prop->asyncEngineCount = cdprop.asyncEngineCount;
p_prop->unifiedAddressing = cdprop.unifiedAddressing;
p_prop->memoryClockRate = cdprop.memoryClockRate;
p_prop->memoryBusWidth = cdprop.memoryBusWidth;
p_prop->l2CacheSize = cdprop.l2CacheSize;
p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
p_prop->streamPrioritiesSupported = cdprop.streamPrioritiesSupported;
p_prop->globalL1CacheSupported = cdprop.globalL1CacheSupported;
p_prop->localL1CacheSupported = cdprop.localL1CacheSupported;
p_prop->sharedMemPerMultiprocessor = cdprop.sharedMemPerMultiprocessor;
p_prop->regsPerMultiprocessor = cdprop.regsPerMultiprocessor;
p_prop->managedMemory = cdprop.managedMemory;
p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
p_prop->multiGpuBoardGroupID = cdprop.multiGpuBoardGroupID;
p_prop->hostNativeAtomicSupported = cdprop.hostNativeAtomicSupported;
p_prop->singleToDoublePrecisionPerfRatio = cdprop.singleToDoublePrecisionPerfRatio;
p_prop->pageableMemoryAccess = cdprop.pageableMemoryAccess;
p_prop->concurrentManagedAccess = cdprop.concurrentManagedAccess;
p_prop->computePreemptionSupported = cdprop.computePreemptionSupported;
p_prop->canUseHostPointerForRegisteredMem = cdprop.canUseHostPointerForRegisteredMem;
p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
p_prop->sharedMemPerBlockOptin = cdprop.sharedMemPerBlockOptin;
p_prop->pageableMemoryAccessUsesHostPageTables = cdprop.pageableMemoryAccessUsesHostPageTables;
p_prop->directManagedMemAccessFromHost = cdprop.directManagedMemAccessFromHost;
p_prop->memPitch = cdprop.memPitch;
p_prop->textureAlignment = cdprop.textureAlignment;
p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
p_prop->ECCEnabled = cdprop.ECCEnabled;
p_prop->tccDriver = cdprop.tccDriver;
return hipCUDAErrorTohipError(cerror);
#if CUDA_VERSION >= 11010
p_prop->accessPolicyMaxWindowSize = cdprop.accessPolicyMaxWindowSize;
p_prop->maxBlocksPerMultiProcessor = cdprop.maxBlocksPerMultiProcessor;
p_prop->persistingL2CacheMaxSize = cdprop.persistingL2CacheMaxSize;
p_prop->reservedSharedMemPerBlock = cdprop.reservedSharedMemPerBlock;
p_prop->warpSize = cdprop.warpSize;
#endif
#if CUDA_VERSION >= 12000
p_prop->clusterLaunch = cdprop.clusterLaunch;
p_prop->deferredMappingHipArraySupported = cdprop.deferredMappingCudaArraySupported;
p_prop->gpuDirectRDMAFlushWritesOptions = cdprop.gpuDirectRDMAFlushWritesOptions;
p_prop->gpuDirectRDMASupported = cdprop.gpuDirectRDMASupported;
p_prop->gpuDirectRDMAWritesOrdering = cdprop.gpuDirectRDMAWritesOrdering;
p_prop->hostRegisterReadOnlySupported = cdprop.hostRegisterReadOnlySupported;
p_prop->hostRegisterSupported = cdprop.hostRegisterSupported;
p_prop->ipcEventSupported = cdprop.ipcEventSupported;
p_prop->memoryPoolSupportedHandleTypes = cdprop.memoryPoolSupportedHandleTypes;
p_prop->memoryPoolsSupported = cdprop.memoryPoolsSupported;
p_prop->sparseHipArraySupported = cdprop.sparseCudaArraySupported;
p_prop->timelineSemaphoreInteropSupported = cdprop.timelineSemaphoreInteropSupported;
p_prop->unifiedFunctionPointers = cdprop.unifiedFunctionPointers;
#endif
return error;
}
inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
@@ -2188,6 +2244,9 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
break;
case hipDeviceAttributeHostRegisterSupported:
cdattr = cudaDevAttrHostRegisterSupported;
break;
case hipDeviceAttributeConcurrentManagedAccess:
cdattr = cudaDevAttrConcurrentManagedAccess;
break;
+1
Переглянути файл
@@ -63,6 +63,7 @@ hipFuncSetSharedMemConfig
hipGetDevice
hipGetDeviceCount
hipGetDeviceProperties
hipGetDevicePropertiesR0600
hipGetErrorName
hipGetErrorString
hipGetLastError
+284 -18
Переглянути файл
@@ -23,6 +23,9 @@
#include "hip_internal.hpp"
#include "hip_mempool_impl.hpp"
#undef hipGetDeviceProperties
#undef hipDeviceProp_t
namespace hip {
// ================================================================================================
@@ -139,12 +142,12 @@ Device::~Device() {
graph_mem_pool_->release();
}
if (null_stream_!= nullptr) {
if (null_stream_ != nullptr) {
hip::Stream::Destroy(null_stream_);
}
}
}
} // namespace hip
void ihipDestroyDevice() {
for (auto deviceHandle : g_devices) {
@@ -171,8 +174,7 @@ hipError_t hipDeviceGet(hipDevice_t* device, int deviceId) {
HIP_RETURN(ihipDeviceGet(device, deviceId));
}
hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) {
hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
HIP_INIT_API(hipDeviceTotalMem, bytes, device);
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
@@ -191,8 +193,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) {
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) {
hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
HIP_INIT_API(hipDeviceComputeCapability, major, minor, device);
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
@@ -232,8 +233,7 @@ hipError_t ihipDeviceGetCount(int* count) {
return hipSuccess;
}
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) {
hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
HIP_INIT_API(hipDeviceGetName, (void*)name, len, device);
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
@@ -275,7 +275,7 @@ hipError_t hipDeviceGetUuid(hipUUID* uuid, hipDevice_t device) {
HIP_RETURN(hipSuccess);
}
hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
hipError_t ihipGetDeviceProperties(hipDeviceProp_tR0600* props, hipDevice_t device) {
if (props == nullptr) {
return hipErrorInvalidValue;
}
@@ -286,7 +286,278 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
auto* deviceHandle = g_devices[device]->devices()[0];
constexpr auto int32_max = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
constexpr auto uint16_max = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max())+1;
constexpr auto uint16_max = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1;
hipDeviceProp_tR0600 deviceProps = {0};
const auto& info = deviceHandle->info();
const auto& isa = deviceHandle->isa();
::strncpy(deviceProps.name, info.boardName_, sizeof(info.boardName_));
::strncpy(deviceProps.uuid.bytes, info.uuid_, sizeof(info.uuid_));
deviceProps.totalGlobalMem = info.globalMemSize_;
deviceProps.sharedMemPerBlock = info.localMemSizePerCU_;
deviceProps.sharedMemPerMultiprocessor = info.localMemSizePerCU_ * info.numRTCUs_;
deviceProps.regsPerBlock = info.availableRegistersPerCU_;
deviceProps.warpSize = info.wavefrontWidth_;
deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_;
deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0];
deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1];
deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2];
deviceProps.maxGridSize[0] = int32_max;
deviceProps.maxGridSize[1] = uint16_max;
deviceProps.maxGridSize[2] = uint16_max;
deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000;
deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000;
deviceProps.memoryBusWidth = info.globalMemChannels_;
deviceProps.totalConstMem = std::min(info.maxConstantBufferSize_, int32_max);
deviceProps.major = isa.versionMajor();
deviceProps.minor = isa.versionMinor();
deviceProps.multiProcessorCount = info.maxComputeUnits_;
deviceProps.l2CacheSize = info.l2CacheSize_;
deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_;
deviceProps.maxBlocksPerMultiProcessor = int(info.maxThreadsPerCU_ / info.maxWorkGroupSize_);
deviceProps.computeMode = 0;
deviceProps.clockInstructionRate = info.timeStampFrequency_;
deviceProps.arch.hasGlobalInt32Atomics = 1;
deviceProps.arch.hasGlobalFloatAtomicExch = 1;
deviceProps.arch.hasSharedInt32Atomics = 1;
deviceProps.arch.hasSharedFloatAtomicExch = 1;
deviceProps.arch.hasFloatAtomicAdd = 1;
deviceProps.arch.hasGlobalInt64Atomics = 1;
deviceProps.arch.hasSharedInt64Atomics = 1;
deviceProps.hostNativeAtomicSupported = info.pcie_atomics_ ? 1 : 0;
deviceProps.arch.hasDoubles = 1;
deviceProps.arch.hasWarpVote = 1;
deviceProps.arch.hasWarpBallot = 1;
deviceProps.arch.hasWarpShuffle = 1;
deviceProps.arch.hasFunnelShift = 0;
deviceProps.arch.hasThreadFenceSystem = 1;
deviceProps.arch.hasSyncThreadsExt = 0;
deviceProps.arch.hasSurfaceFuncs = 0;
deviceProps.arch.has3dGrid = 1;
deviceProps.arch.hasDynamicParallelism = 0;
deviceProps.concurrentKernels = 1;
deviceProps.pciDomainID = info.pciDomainID;
deviceProps.pciBusID = info.deviceTopology_.pcie.bus;
deviceProps.pciDeviceID = info.deviceTopology_.pcie.device;
deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_;
deviceProps.canMapHostMemory = 1;
deviceProps.regsPerMultiprocessor = info.availableRegistersPerCU_;
sprintf(deviceProps.gcnArchName, "%s", isa.targetId());
deviceProps.cooperativeLaunch = info.cooperativeGroups_;
deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_;
deviceProps.cooperativeMultiDeviceUnmatchedFunc = info.cooperativeMultiDeviceGroups_;
deviceProps.cooperativeMultiDeviceUnmatchedGridDim = info.cooperativeMultiDeviceGroups_;
deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_;
deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_;
deviceProps.maxTexture1DLinear =
std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes
deviceProps.maxTexture1DMipmap = std::min(16 * info.imageMaxBufferSize_, int32_max);
deviceProps.maxTexture1D = deviceProps.maxSurface1D = std::min(info.image1DMaxWidth_, int32_max);
deviceProps.maxTexture2D[0] = deviceProps.maxSurface2D[0] =
std::min(info.image2DMaxWidth_, int32_max);
deviceProps.maxTexture2D[1] = deviceProps.maxSurface2D[1] =
std::min(info.image2DMaxHeight_, int32_max);
deviceProps.maxTexture3D[0] = deviceProps.maxSurface3D[0] =
std::min(info.image3DMaxWidth_, int32_max);
deviceProps.maxTexture3D[1] = deviceProps.maxSurface3D[1] =
std::min(info.image3DMaxHeight_, int32_max);
deviceProps.maxTexture3D[2] = deviceProps.maxSurface3D[2] =
std::min(info.image3DMaxDepth_, int32_max);
deviceProps.maxTexture1DLayered[0] = deviceProps.maxSurface1DLayered[0] =
std::min(info.image1DAMaxWidth_, int32_max);
deviceProps.maxTexture1DLayered[1] = deviceProps.maxSurface1DLayered[1] =
std::min(info.imageMaxArraySize_, int32_max);
deviceProps.maxTexture2DLayered[0] = deviceProps.maxSurface2DLayered[0] =
std::min(info.image2DAMaxWidth_[0], int32_max);
deviceProps.maxTexture2DLayered[1] = deviceProps.maxSurface2DLayered[1] =
std::min(info.image2DAMaxWidth_[1], int32_max);
deviceProps.maxTexture2DLayered[2] = deviceProps.maxSurface2DLayered[2] =
std::min(info.imageMaxArraySize_, int32_max);
deviceProps.hdpMemFlushCntl = info.hdpMemFlushCntl;
deviceProps.hdpRegFlushCntl = info.hdpRegFlushCntl;
deviceProps.memPitch = std::min(info.maxMemAllocSize_, int32_max);
deviceProps.textureAlignment = deviceProps.surfaceAlignment = info.imageBaseAddressAlignment_;
deviceProps.texturePitchAlignment = info.imagePitchAlignment_;
deviceProps.kernelExecTimeoutEnabled = 0;
deviceProps.ECCEnabled = info.errorCorrectionSupport_ ? 1 : 0;
deviceProps.isLargeBar = info.largeBar_ ? 1 : 0;
deviceProps.asicRevision = info.asicRevision_;
deviceProps.ipcEventSupported = 1;
deviceProps.streamPrioritiesSupported = 1;
deviceProps.multiGpuBoardGroupID = info.deviceTopology_.pcie.device;
// HMM capabilities
deviceProps.asyncEngineCount = info.numAsyncQueues_;
deviceProps.deviceOverlap = (info.numAsyncQueues_ > 0) ? 1 : 0;
deviceProps.unifiedAddressing = info.hmmDirectHostAccess_;
deviceProps.managedMemory = info.hmmSupported_;
deviceProps.concurrentManagedAccess = info.hmmSupported_;
deviceProps.directManagedMemAccessFromHost = info.hmmDirectHostAccess_;
deviceProps.canUseHostPointerForRegisteredMem = info.hostUnifiedMemory_;
deviceProps.pageableMemoryAccess = info.hmmCpuMemoryAccessible_;
deviceProps.hostRegisterSupported = info.hostUnifiedMemory_;
deviceProps.pageableMemoryAccessUsesHostPageTables = info.hostUnifiedMemory_;
// Mem pool
deviceProps.memoryPoolsSupported = HIP_MEM_POOL_SUPPORT;
deviceProps.memoryPoolSupportedHandleTypes = 0;
// Caching behavior
deviceProps.globalL1CacheSupported = 1;
deviceProps.localL1CacheSupported = 1;
deviceProps.persistingL2CacheMaxSize = info.l2CacheSize_;
deviceProps.reservedSharedMemPerBlock = 0;
deviceProps.sharedMemPerBlockOptin = 0;
// Unsupported features
// Single to double precision perf ratio
deviceProps.singleToDoublePrecisionPerfRatio = 0;
// Flag hipHostRegisterReadOnly
deviceProps.hostRegisterReadOnlySupported = 0;
// Compute preemption
deviceProps.computePreemptionSupported = 0;
// Cubemaps
deviceProps.maxTextureCubemap = 0;
deviceProps.maxTextureCubemapLayered[0] = 0;
deviceProps.maxTextureCubemapLayered[1] = 0;
deviceProps.maxSurfaceCubemap = 0;
deviceProps.maxSurfaceCubemapLayered[0] = 0;
deviceProps.maxSurfaceCubemapLayered[1] = 0;
// Texture gather ops
deviceProps.maxTexture2DGather[0] = 0;
deviceProps.maxTexture2DGather[1] = 0;
// Textures bound to pitch memory
deviceProps.maxTexture2DLinear[0] = 0;
deviceProps.maxTexture2DLinear[1] = 0;
deviceProps.maxTexture2DLinear[2] = 0;
// Alternate 3D texture
deviceProps.maxTexture3DAlt[0] = 0;
deviceProps.maxTexture3DAlt[1] = 0;
deviceProps.maxTexture3DAlt[2] = 0;
// access policy
deviceProps.accessPolicyMaxWindowSize = 0;
// cluster launch
deviceProps.clusterLaunch = 0;
// Mapping HIP array
deviceProps.deferredMappingHipArraySupported = 0;
// RDMA options
deviceProps.gpuDirectRDMASupported = 0;
deviceProps.gpuDirectRDMAFlushWritesOptions = 0;
deviceProps.gpuDirectRDMAWritesOrdering = 0;
// luid - TODO populate it only on windows
std::memset(deviceProps.luid, 0, sizeof(deviceProps.luid));
deviceProps.luidDeviceNodeMask = 0;
deviceProps.sparseHipArraySupported = 0;
deviceProps.timelineSemaphoreInteropSupported = 0;
deviceProps.unifiedFunctionPointers = 0;
*props = deviceProps;
return hipSuccess;
}
hipError_t hipGetDevicePropertiesR0600(hipDeviceProp_tR0600* props, hipDevice_t device) {
HIP_INIT_API(hipGetDevicePropertiesR0600, props, device);
HIP_RETURN(ihipGetDeviceProperties(props, device));
}
extern "C" typedef struct hipDeviceProp_t {
char name[256]; ///< Device name.
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes).
int regsPerBlock; ///< Registers per block.
int warpSize; ///< Warp size.
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
int memoryClockRate; ///< Max global memory clock frequency in khz.
int memoryBusWidth; ///< Global memory bus width in bits.
size_t totalConstMem; ///< Size of shared memory region (in bytes).
int major; ///< Major compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
///< differ from CUDA CC. See the arch feature flags for portable ways to query
///< feature caps.
int multiProcessorCount; ///< Number of multi-processors (compute units).
int l2CacheSize; ///< L2 cache size.
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
int computeMode; ///< Compute mode.
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
///< instructions. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
int pciDomainID; ///< PCI Domain ID
int pciBusID; ///< PCI Bus ID.
int pciDeviceID; ///< PCI Device ID.
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor.
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
int canMapHostMemory; ///< Check whether HIP can map host memory
int gcnArch; ///< DEPRECATED: use gcnArchName instead
char gcnArchName[256]; ///< AMD GCN Arch Name.
int integrated; ///< APU vs dGPU
int cooperativeLaunch; ///< HIP device supports cooperative launch
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple
///< devices
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
int maxTexture1D; ///< Maximum number of elements in 1D images
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image
///< elements
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies
size_t textureAlignment; ///< Alignment requirement for textures
size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to
///< pitched memory
int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device
int ECCEnabled; ///< Device has ECC support enabled
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched functions
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched grid dimensions
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched block dimensions
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on
///< multiple
/// devices with unmatched shared memories
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
int asicRevision; ///< Revision of the GPU in this device
int managedMemory; ///< Device supports allocating managed memory on this system
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device
///< without migration
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with
///< the CPU
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
///< without calling hipHostRegister on it
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's
///< page tables
} hipDeviceProp_t;
extern "C" hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
// Removing this API from tracing.
// This API is now in backwards compatibility mode and is not callable from newly compiled apps.
HIP_INIT_VOID();
if (props == nullptr) {
return hipErrorInvalidValue;
}
if (unsigned(device) >= g_devices.size()) {
return hipErrorInvalidDevice;
}
auto* deviceHandle = g_devices[device]->devices()[0];
constexpr auto int32_max = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
constexpr auto uint16_max = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1;
hipDeviceProp_t deviceProps = {0};
const auto& info = deviceHandle->info();
@@ -348,8 +619,9 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_;
deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_;
deviceProps.maxTexture1DLinear = std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes
deviceProps.maxTexture1D = std::min(info.image1DMaxWidth_, int32_max);
deviceProps.maxTexture1DLinear =
std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes
deviceProps.maxTexture1D = std::min(info.image1DMaxWidth_, int32_max);
deviceProps.maxTexture2D[0] = std::min(info.image2DMaxWidth_, int32_max);
deviceProps.maxTexture2D[1] = std::min(info.image2DMaxHeight_, int32_max);
deviceProps.maxTexture3D[0] = std::min(info.image3DMaxWidth_, int32_max);
@@ -376,9 +648,3 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
*props = deviceProps;
return hipSuccess;
}
hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
HIP_INIT_API(hipGetDeviceProperties, props, device);
HIP_RETURN(ihipGetDeviceProperties(props, device));
}
+373 -316
Переглянути файл
@@ -23,7 +23,6 @@
#include "hip_internal.hpp"
hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
HIP_INIT_API(hipChooseDevice, device, properties);
if (device == nullptr || properties == nullptr) {
@@ -35,99 +34,97 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
int count = 0;
HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count));
for (cl_int i = 0; i< count; ++i) {
for (cl_int i = 0; i < count; ++i) {
hipDeviceProp_t currentProp = {0};
cl_uint validPropCount = 0;
cl_uint matchedCount = 0;
hipError_t err = ihipGetDeviceProperties(&currentProp, i);
if (properties->major != 0) {
validPropCount++;
if(currentProp.major >= properties->major) {
if (currentProp.major >= properties->major) {
matchedCount++;
}
}
if (properties->minor != 0) {
validPropCount++;
if(currentProp.minor >= properties->minor) {
if (currentProp.minor >= properties->minor) {
matchedCount++;
}
}
if(properties->totalGlobalMem != 0) {
validPropCount++;
if(currentProp.totalGlobalMem >= properties->totalGlobalMem) {
matchedCount++;
}
if (properties->totalGlobalMem != 0) {
validPropCount++;
if (currentProp.totalGlobalMem >= properties->totalGlobalMem) {
matchedCount++;
}
}
if(properties->sharedMemPerBlock != 0) {
validPropCount++;
if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) {
matchedCount++;
}
if (properties->sharedMemPerBlock != 0) {
validPropCount++;
if (currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) {
matchedCount++;
}
}
if(properties->maxThreadsPerBlock != 0) {
validPropCount++;
if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) {
matchedCount++;
}
if (properties->maxThreadsPerBlock != 0) {
validPropCount++;
if (currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock) {
matchedCount++;
}
}
if(properties->totalConstMem != 0) {
validPropCount++;
if(currentProp.totalConstMem >= properties->totalConstMem ) {
matchedCount++;
}
if (properties->totalConstMem != 0) {
validPropCount++;
if (currentProp.totalConstMem >= properties->totalConstMem) {
matchedCount++;
}
}
if(properties->multiProcessorCount != 0) {
validPropCount++;
if(currentProp.multiProcessorCount >=
properties->multiProcessorCount ) {
matchedCount++;
}
if (properties->multiProcessorCount != 0) {
validPropCount++;
if (currentProp.multiProcessorCount >= properties->multiProcessorCount) {
matchedCount++;
}
}
if(properties->maxThreadsPerMultiProcessor != 0) {
validPropCount++;
if(currentProp.maxThreadsPerMultiProcessor >=
properties->maxThreadsPerMultiProcessor ) {
matchedCount++;
}
if (properties->maxThreadsPerMultiProcessor != 0) {
validPropCount++;
if (currentProp.maxThreadsPerMultiProcessor >= properties->maxThreadsPerMultiProcessor) {
matchedCount++;
}
}
if(properties->memoryClockRate != 0) {
validPropCount++;
if(currentProp.memoryClockRate >= properties->memoryClockRate ) {
matchedCount++;
}
if (properties->memoryClockRate != 0) {
validPropCount++;
if (currentProp.memoryClockRate >= properties->memoryClockRate) {
matchedCount++;
}
}
if(properties->memoryBusWidth != 0) {
validPropCount++;
if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) {
matchedCount++;
}
if (properties->memoryBusWidth != 0) {
validPropCount++;
if (currentProp.memoryBusWidth >= properties->memoryBusWidth) {
matchedCount++;
}
}
if(properties->l2CacheSize != 0) {
validPropCount++;
if(currentProp.l2CacheSize >= properties->l2CacheSize ) {
matchedCount++;
}
if (properties->l2CacheSize != 0) {
validPropCount++;
if (currentProp.l2CacheSize >= properties->l2CacheSize) {
matchedCount++;
}
}
if(properties->regsPerBlock != 0) {
validPropCount++;
if(currentProp.regsPerBlock >= properties->regsPerBlock ) {
matchedCount++;
}
if (properties->regsPerBlock != 0) {
validPropCount++;
if (currentProp.regsPerBlock >= properties->regsPerBlock) {
matchedCount++;
}
}
if(properties->maxSharedMemoryPerMultiProcessor != 0) {
validPropCount++;
if(currentProp.maxSharedMemoryPerMultiProcessor >=
properties->maxSharedMemoryPerMultiProcessor ) {
matchedCount++;
}
if (properties->maxSharedMemoryPerMultiProcessor != 0) {
validPropCount++;
if (currentProp.maxSharedMemoryPerMultiProcessor >=
properties->maxSharedMemoryPerMultiProcessor) {
matchedCount++;
}
}
if(properties->warpSize != 0) {
validPropCount++;
if(currentProp.warpSize >= properties->warpSize ) {
matchedCount++;
}
if (properties->warpSize != 0) {
validPropCount++;
if (currentProp.warpSize >= properties->warpSize) {
matchedCount++;
}
}
if(validPropCount == matchedCount) {
if (validPropCount == matchedCount) {
*device = matchedCount > maxMatchedCount ? i : *device;
maxMatchedCount = std::max(matchedCount, maxMatchedCount);
}
@@ -137,7 +134,6 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
}
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device);
if (pi == nullptr) {
@@ -151,214 +147,282 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
HIP_RETURN(hipErrorInvalidDevice);
}
//FIXME: should we cache the props, or just select from deviceHandle->info_?
// FIXME: should we cache the props, or just select from deviceHandle->info_?
hipDeviceProp_t prop = {0};
HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, device));
constexpr auto int32_max = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
switch (attr) {
case hipDeviceAttributeMaxThreadsPerBlock:
*pi = prop.maxThreadsPerBlock;
break;
case hipDeviceAttributeMaxBlockDimX:
*pi = prop.maxThreadsDim[0];
break;
case hipDeviceAttributeMaxBlockDimY:
*pi = prop.maxThreadsDim[1];
break;
case hipDeviceAttributeMaxBlockDimZ:
*pi = prop.maxThreadsDim[2];
break;
case hipDeviceAttributeMaxGridDimX:
*pi = prop.maxGridSize[0];
break;
case hipDeviceAttributeMaxGridDimY:
*pi = prop.maxGridSize[1];
break;
case hipDeviceAttributeMaxGridDimZ:
*pi = prop.maxGridSize[2];
break;
case hipDeviceAttributeMaxSharedMemoryPerBlock:
*pi = prop.sharedMemPerBlock;
break;
case hipDeviceAttributeTotalConstantMemory:
// size_t to int casting
*pi = std::min(prop.totalConstMem, int32_max);
break;
case hipDeviceAttributeWarpSize:
*pi = prop.warpSize;
break;
case hipDeviceAttributeMaxRegistersPerBlock:
*pi = prop.regsPerBlock;
break;
case hipDeviceAttributeClockRate:
*pi = prop.clockRate;
break;
case hipDeviceAttributeWallClockRate:
*pi = g_devices[device]->devices()[0]->info().wallClockFrequency_;
break;
case hipDeviceAttributeMemoryClockRate:
*pi = prop.memoryClockRate;
break;
case hipDeviceAttributeMemoryBusWidth:
*pi = prop.memoryBusWidth;
break;
case hipDeviceAttributeMultiprocessorCount:
*pi = prop.multiProcessorCount;
break;
case hipDeviceAttributeComputeMode:
*pi = prop.computeMode;
break;
case hipDeviceAttributeL2CacheSize:
*pi = prop.l2CacheSize;
break;
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
*pi = prop.maxThreadsPerMultiProcessor;
break;
case hipDeviceAttributeComputeCapabilityMajor:
*pi = prop.major;
break;
case hipDeviceAttributeComputeCapabilityMinor:
*pi = prop.minor;
break;
case hipDeviceAttributePciBusId:
*pi = prop.pciBusID;
break;
case hipDeviceAttributeConcurrentKernels:
*pi = prop.concurrentKernels;
break;
case hipDeviceAttributePciDeviceId:
*pi = prop.pciDeviceID;
break;
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
*pi = prop.maxSharedMemoryPerMultiProcessor;
break;
case hipDeviceAttributeIsMultiGpuBoard:
*pi = prop.isMultiGpuBoard;
break;
case hipDeviceAttributeCooperativeLaunch:
*pi = prop.cooperativeLaunch;
break;
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
*pi = prop.cooperativeMultiDeviceLaunch;
break;
case hipDeviceAttributeIntegrated:
*pi = prop.integrated;
break;
case hipDeviceAttributeMaxTexture1DWidth:
*pi = prop.maxTexture1D;
break;
case hipDeviceAttributeMaxTexture2DWidth:
*pi = prop.maxTexture2D[0];
break;
case hipDeviceAttributeMaxTexture2DHeight:
*pi = prop.maxTexture2D[1];
break;
case hipDeviceAttributeMaxTexture3DWidth:
*pi = prop.maxTexture3D[0];
break;
case hipDeviceAttributeMaxTexture3DHeight:
*pi = prop.maxTexture3D[1];
break;
case hipDeviceAttributeMaxTexture3DDepth:
*pi = prop.maxTexture3D[2];
break;
case hipDeviceAttributeHdpMemFlushCntl:
*reinterpret_cast<unsigned int**>(pi) = prop.hdpMemFlushCntl;
break;
case hipDeviceAttributeHdpRegFlushCntl:
*reinterpret_cast<unsigned int**>(pi) = prop.hdpRegFlushCntl;
break;
case hipDeviceAttributeMaxPitch:
// size_t to int casting
*pi = std::min(prop.memPitch, int32_max);
break;
case hipDeviceAttributeTextureAlignment:
*pi = prop.textureAlignment;
break;
case hipDeviceAttributeTexturePitchAlignment:
*pi = prop.texturePitchAlignment;
break;
case hipDeviceAttributeKernelExecTimeout:
*pi = prop.kernelExecTimeoutEnabled;
break;
case hipDeviceAttributeCanMapHostMemory:
*pi = prop.canMapHostMemory;
break;
case hipDeviceAttributeEccEnabled:
*pi = prop.ECCEnabled;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc:
*pi = prop.cooperativeMultiDeviceUnmatchedFunc;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim:
*pi = prop.cooperativeMultiDeviceUnmatchedGridDim;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim:
*pi = prop.cooperativeMultiDeviceUnmatchedBlockDim;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem:
*pi = prop.cooperativeMultiDeviceUnmatchedSharedMem;
break;
case hipDeviceAttributeAsicRevision:
*pi = prop.asicRevision;
break;
case hipDeviceAttributeManagedMemory:
*pi = prop.managedMemory;
break;
case hipDeviceAttributeDirectManagedMemAccessFromHost:
*pi = prop.directManagedMemAccessFromHost;
break;
case hipDeviceAttributeConcurrentManagedAccess:
*pi = prop.concurrentManagedAccess;
break;
case hipDeviceAttributePageableMemoryAccess:
*pi = prop.pageableMemoryAccess;
break;
case hipDeviceAttributePageableMemoryAccessUsesHostPageTables:
*pi = prop.pageableMemoryAccessUsesHostPageTables;
break;
case hipDeviceAttributeIsLargeBar:
*pi = prop.isLargeBar;
break;
case hipDeviceAttributeUnifiedAddressing:
// HIP runtime always uses SVM for host memory allocations.
// Note: Host registered memory isn't covered by this feature
// and still requires hipMemHostGetDevicePointer() call
*pi = true;
break;
case hipDeviceAttributeCanUseStreamWaitValue:
// hipStreamWaitValue64() and hipStreamWaitValue32() support
*pi = g_devices[device]->devices()[0]->info().aqlBarrierValue_;
break;
case hipDeviceAttributeImageSupport:
*pi = static_cast<int>(g_devices[device]->devices()[0]->info().imageSupport_);
break;
case hipDeviceAttributePhysicalMultiProcessorCount:
*pi = g_devices[device]->devices()[0]->info().maxPhysicalComputeUnits_;
break;
case hipDeviceAttributeFineGrainSupport:
*pi = static_cast<int>(g_devices[device]->devices()[0]->isFineGrainSupported());
break;
case hipDeviceAttributeMemoryPoolsSupported:
*pi = HIP_MEM_POOL_SUPPORT;
break;
case hipDeviceAttributeVirtualMemoryManagementSupported:
*pi = static_cast<int>(g_devices[device]->devices()[0]->info().virtualMemoryManagement_);
break;
case hipDeviceAttributeHostRegisterSupported:
*pi = true;
break;
default:
HIP_RETURN(hipErrorInvalidValue);
case hipDeviceAttributeMaxThreadsPerBlock:
*pi = prop.maxThreadsPerBlock;
break;
case hipDeviceAttributeAsyncEngineCount:
*pi = prop.asyncEngineCount;
break;
case hipDeviceAttributeMaxBlockDimX:
*pi = prop.maxThreadsDim[0];
break;
case hipDeviceAttributeMaxBlockDimY:
*pi = prop.maxThreadsDim[1];
break;
case hipDeviceAttributeMaxBlockDimZ:
*pi = prop.maxThreadsDim[2];
break;
case hipDeviceAttributeMaxGridDimX:
*pi = prop.maxGridSize[0];
break;
case hipDeviceAttributeMaxGridDimY:
*pi = prop.maxGridSize[1];
break;
case hipDeviceAttributeMaxGridDimZ:
*pi = prop.maxGridSize[2];
break;
case hipDeviceAttributeMaxSurface1D:
*pi = prop.maxSurface1D;
break;
case hipDeviceAttributeMaxSharedMemoryPerBlock:
*pi = prop.sharedMemPerBlock;
break;
case hipDeviceAttributeSharedMemPerBlockOptin:
*pi = prop.sharedMemPerBlockOptin;
break;
case hipDeviceAttributeSharedMemPerMultiprocessor:
*pi = prop.sharedMemPerMultiprocessor;
break;
case hipDeviceAttributeStreamPrioritiesSupported:
*pi = prop.streamPrioritiesSupported;
break;
case hipDeviceAttributeSurfaceAlignment:
*pi = prop.surfaceAlignment;
break;
case hipDeviceAttributeTotalConstantMemory:
// size_t to int casting
*pi = std::min(prop.totalConstMem, int32_max);
break;
case hipDeviceAttributeTotalGlobalMem:
*pi = std::min(prop.totalGlobalMem, int32_max);
break;
case hipDeviceAttributeWarpSize:
*pi = prop.warpSize;
break;
case hipDeviceAttributeMaxRegistersPerBlock:
*pi = prop.regsPerBlock;
break;
case hipDeviceAttributeClockRate:
*pi = prop.clockRate;
break;
case hipDeviceAttributeWallClockRate:
*pi = g_devices[device]->devices()[0]->info().wallClockFrequency_;
break;
case hipDeviceAttributeMemoryClockRate:
*pi = prop.memoryClockRate;
break;
case hipDeviceAttributeMemoryBusWidth:
*pi = prop.memoryBusWidth;
break;
case hipDeviceAttributeMultiprocessorCount:
*pi = prop.multiProcessorCount;
break;
case hipDeviceAttributeComputeMode:
*pi = prop.computeMode;
break;
case hipDeviceAttributeComputePreemptionSupported:
*pi = prop.computePreemptionSupported;
break;
case hipDeviceAttributeL2CacheSize:
*pi = prop.l2CacheSize;
break;
case hipDeviceAttributeLocalL1CacheSupported:
*pi = prop.localL1CacheSupported;
break;
case hipDeviceAttributeLuidDeviceNodeMask:
*pi = prop.luidDeviceNodeMask;
break;
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
*pi = prop.maxThreadsPerMultiProcessor;
break;
case hipDeviceAttributeComputeCapabilityMajor:
*pi = prop.major;
break;
case hipDeviceAttributeComputeCapabilityMinor:
*pi = prop.minor;
break;
case hipDeviceAttributeMultiGpuBoardGroupID:
*pi = prop.multiGpuBoardGroupID;
break;
case hipDeviceAttributePciBusId:
*pi = prop.pciBusID;
break;
case hipDeviceAttributeConcurrentKernels:
*pi = prop.concurrentKernels;
break;
case hipDeviceAttributePciDeviceId:
*pi = prop.pciDeviceID;
break;
case hipDeviceAttributePciDomainID:
*pi = prop.pciDomainID;
break;
case hipDeviceAttributePersistingL2CacheMaxSize:
*pi = prop.persistingL2CacheMaxSize;
break;
case hipDeviceAttributeMaxRegistersPerMultiprocessor:
*pi = prop.regsPerMultiprocessor;
break;
case hipDeviceAttributeReservedSharedMemPerBlock:
*pi = prop.reservedSharedMemPerBlock;
break;
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
*pi = prop.maxSharedMemoryPerMultiProcessor;
break;
case hipDeviceAttributeIsMultiGpuBoard:
*pi = prop.isMultiGpuBoard;
break;
case hipDeviceAttributeCooperativeLaunch:
*pi = prop.cooperativeLaunch;
break;
case hipDeviceAttributeHostRegisterSupported:
*pi = 1; // AMD GPUs allow you to register host memory regardless of the GPU
break;
case hipDeviceAttributeDeviceOverlap:
*pi = prop.asyncEngineCount > 0 ? 1 : 0;
break;
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
*pi = prop.cooperativeMultiDeviceLaunch;
break;
case hipDeviceAttributeIntegrated:
*pi = prop.integrated;
break;
case hipDeviceAttributeMaxTexture1DWidth:
*pi = prop.maxTexture1D;
break;
case hipDeviceAttributeMaxTexture1DLinear:
*pi = prop.maxTexture1DLinear;
break;
case hipDeviceAttributeMaxTexture1DMipmap:
*pi = prop.maxTexture1DMipmap;
break;
case hipDeviceAttributeMaxTextureCubemap:
*pi = prop.maxTextureCubemap;
break;
case hipDeviceAttributeMaxTexture2DWidth:
*pi = prop.maxTexture2D[0];
break;
case hipDeviceAttributeMaxTexture2DHeight:
*pi = prop.maxTexture2D[1];
break;
case hipDeviceAttributeMaxTexture3DWidth:
*pi = prop.maxTexture3D[0];
break;
case hipDeviceAttributeMaxTexture3DHeight:
*pi = prop.maxTexture3D[1];
break;
case hipDeviceAttributeMaxTexture3DDepth:
*pi = prop.maxTexture3D[2];
break;
case hipDeviceAttributeHdpMemFlushCntl:
*reinterpret_cast<unsigned int**>(pi) = prop.hdpMemFlushCntl;
break;
case hipDeviceAttributeHdpRegFlushCntl:
*reinterpret_cast<unsigned int**>(pi) = prop.hdpRegFlushCntl;
break;
case hipDeviceAttributeMaxPitch:
// size_t to int casting
*pi = std::min(prop.memPitch, int32_max);
break;
case hipDeviceAttributeTextureAlignment:
*pi = prop.textureAlignment;
break;
case hipDeviceAttributeTexturePitchAlignment:
*pi = prop.texturePitchAlignment;
break;
case hipDeviceAttributeKernelExecTimeout:
*pi = prop.kernelExecTimeoutEnabled;
break;
case hipDeviceAttributeCanMapHostMemory:
*pi = prop.canMapHostMemory;
break;
case hipDeviceAttributeCanUseHostPointerForRegisteredMem:
*pi = prop.canUseHostPointerForRegisteredMem;
break;
case hipDeviceAttributeEccEnabled:
*pi = prop.ECCEnabled;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc:
*pi = prop.cooperativeMultiDeviceUnmatchedFunc;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim:
*pi = prop.cooperativeMultiDeviceUnmatchedGridDim;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim:
*pi = prop.cooperativeMultiDeviceUnmatchedBlockDim;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem:
*pi = prop.cooperativeMultiDeviceUnmatchedSharedMem;
break;
case hipDeviceAttributeAsicRevision:
*pi = prop.asicRevision;
break;
case hipDeviceAttributeManagedMemory:
*pi = prop.managedMemory;
break;
case hipDeviceAttributeMaxBlocksPerMultiProcessor:
*pi = prop.maxBlocksPerMultiProcessor;
break;
case hipDeviceAttributeDirectManagedMemAccessFromHost:
*pi = prop.directManagedMemAccessFromHost;
break;
case hipDeviceAttributeGlobalL1CacheSupported:
*pi = prop.globalL1CacheSupported;
break;
case hipDeviceAttributeHostNativeAtomicSupported:
*pi = prop.hostNativeAtomicSupported;
break;
case hipDeviceAttributeConcurrentManagedAccess:
*pi = prop.concurrentManagedAccess;
break;
case hipDeviceAttributePageableMemoryAccess:
*pi = prop.pageableMemoryAccess;
break;
case hipDeviceAttributePageableMemoryAccessUsesHostPageTables:
*pi = prop.pageableMemoryAccessUsesHostPageTables;
break;
case hipDeviceAttributeIsLargeBar:
*pi = prop.isLargeBar;
break;
case hipDeviceAttributeUnifiedAddressing:
// HIP runtime always uses SVM for host memory allocations.
// Note: Host registered memory isn't covered by this feature
// and still requires hipMemHostGetDevicePointer() call
*pi = true;
break;
case hipDeviceAttributeCanUseStreamWaitValue:
// hipStreamWaitValue64() and hipStreamWaitValue32() support
*pi = g_devices[device]->devices()[0]->info().aqlBarrierValue_;
break;
case hipDeviceAttributeImageSupport:
*pi = static_cast<int>(g_devices[device]->devices()[0]->info().imageSupport_);
break;
case hipDeviceAttributePhysicalMultiProcessorCount:
*pi = g_devices[device]->devices()[0]->info().maxPhysicalComputeUnits_;
break;
case hipDeviceAttributeFineGrainSupport:
*pi = static_cast<int>(g_devices[device]->devices()[0]->isFineGrainSupported());
break;
case hipDeviceAttributeMemoryPoolsSupported:
*pi = HIP_MEM_POOL_SUPPORT;
break;
case hipDeviceAttributeVirtualMemoryManagementSupported:
*pi = static_cast<int>(g_devices[device]->devices()[0]->info().virtualMemoryManagement_);
break;
default:
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusIdstr) {
HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr);
if (device == nullptr || pciBusIdstr == nullptr) {
@@ -369,9 +433,9 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
int pciDeviceID = -1;
int pciDomainID = -1;
bool found = false;
if (sscanf (pciBusIdstr, "%04x:%02x:%02x", reinterpret_cast<unsigned int*>(&pciDomainID),
reinterpret_cast<unsigned int*>(&pciBusID),
reinterpret_cast<unsigned int*>(&pciDeviceID)) == 0x3) {
if (sscanf(pciBusIdstr, "%04x:%02x:%02x", reinterpret_cast<unsigned int*>(&pciDomainID),
reinterpret_cast<unsigned int*>(&pciBusID),
reinterpret_cast<unsigned int*>(&pciDeviceID)) == 0x3) {
int count = 0;
HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count));
for (cl_int i = 0; i < count; i++) {
@@ -380,8 +444,8 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
HIP_RETURN_ONFAIL(ihipDeviceGet(&dev, i));
HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, dev));
if ((pciBusID == prop.pciBusID) && (pciDomainID == prop.pciDomainID)
&& (pciDeviceID == prop.pciDeviceID)) {
if ((pciBusID == prop.pciBusID) && (pciDomainID == prop.pciDomainID) &&
(pciDeviceID == prop.pciDeviceID)) {
*device = i;
found = true;
break;
@@ -395,10 +459,10 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) {
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig) {
HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig);
if(cacheConfig == nullptr) {
if (cacheConfig == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -407,8 +471,7 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) {
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) {
hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
HIP_INIT_API(hipDeviceGetLimit, pValue, limit);
if (pValue == nullptr || limit >= hipLimitRange) {
@@ -431,8 +494,7 @@ hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) {
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) {
hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device) {
HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device);
int count;
@@ -442,24 +504,19 @@ hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) {
HIP_RETURN(hipErrorInvalidDevice);
}
//pciBusId should be large enough to store 13 characters including the NULL-terminator.
// pciBusId should be large enough to store 13 characters including the NULL-terminator.
if (pciBusId == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
hipDeviceProp_t prop;
HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, device));
auto* deviceHandle = g_devices[device]->devices()[0];
snprintf (pciBusId, len, "%04x:%02x:%02x.%01x",
prop.pciDomainID,
prop.pciBusID,
prop.pciDeviceID,
deviceHandle->info().deviceTopology_.pcie.function);
snprintf(pciBusId, len, "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
HIP_RETURN(len <= 12 ? hipErrorInvalidValue : hipSuccess);
}
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) {
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig) {
HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig);
if (pConfig == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
@@ -469,7 +526,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) {
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceReset ( void ) {
hipError_t hipDeviceReset(void) {
HIP_INIT_API(hipDeviceReset);
hip::getCurrentDevice()->Reset();
@@ -477,7 +534,7 @@ hipError_t hipDeviceReset ( void ) {
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) {
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig);
// No way to set cache config yet.
@@ -485,34 +542,33 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) {
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) {
hipError_t hipDeviceSetLimit(hipLimit_t limit, size_t value) {
HIP_INIT_API(hipDeviceSetLimit, limit, value);
if (limit >= hipLimitRange) {
HIP_RETURN(hipErrorInvalidValue);
}
switch(limit) {
case hipLimitStackSize :
// need to query device size and take action
if (!hip::getCurrentDevice()->devices()[0]->UpdateStackSize(value)) {
HIP_RETURN(hipErrorInvalidValue);
}
break;
case hipLimitMallocHeapSize:
if (!hip::getCurrentDevice()->devices()[0]->UpdateInitialHeapSize(value)) {
HIP_RETURN(hipErrorInvalidValue);
}
break;
default:
LogPrintfError("UnsupportedLimit = %d is passed", limit);
HIP_RETURN(hipErrorUnsupportedLimit);
switch (limit) {
case hipLimitStackSize:
// need to query device size and take action
if (!hip::getCurrentDevice()->devices()[0]->UpdateStackSize(value)) {
HIP_RETURN(hipErrorInvalidValue);
}
break;
case hipLimitMallocHeapSize:
if (!hip::getCurrentDevice()->devices()[0]->UpdateInitialHeapSize(value)) {
HIP_RETURN(hipErrorInvalidValue);
}
break;
default:
LogPrintfError("UnsupportedLimit = %d is passed", limit);
HIP_RETURN(hipErrorUnsupportedLimit);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) {
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
HIP_INIT_API(hipDeviceSetSharedMemConfig, config);
if (config != hipSharedMemBankSizeDefault &&
config != hipSharedMemBankSizeFourByte &&
if (config != hipSharedMemBankSizeDefault && config != hipSharedMemBankSizeFourByte &&
config != hipSharedMemBankSizeEightByte) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -530,13 +586,13 @@ hipError_t hipDeviceSynchronize() {
int ihipGetDevice() {
hip::Device* device = hip::getCurrentDevice();
if(device == nullptr){
if (device == nullptr) {
return -1;
}
return device->deviceId();
}
hipError_t hipGetDevice ( int* deviceId ) {
hipError_t hipGetDevice(int* deviceId) {
HIP_INIT_API(hipGetDevice, deviceId);
if (deviceId != nullptr) {
@@ -551,13 +607,13 @@ hipError_t hipGetDevice ( int* deviceId ) {
}
}
hipError_t hipGetDeviceCount ( int* count ) {
hipError_t hipGetDeviceCount(int* count) {
HIP_INIT_API_NO_RETURN(hipGetDeviceCount, count);
HIP_RETURN(ihipDeviceGetCount(count));
}
hipError_t hipGetDeviceFlags ( unsigned int* flags ) {
hipError_t hipGetDeviceFlags(unsigned int* flags) {
HIP_INIT_API(hipGetDeviceFlags, flags);
if (flags == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
@@ -566,7 +622,7 @@ hipError_t hipGetDeviceFlags ( unsigned int* flags ) {
HIP_RETURN(hipSuccess);
}
hipError_t hipSetDevice ( int device ) {
hipError_t hipSetDevice(int device) {
HIP_INIT_API_NO_RETURN(hipSetDevice, device);
if (static_cast<unsigned int>(device) < g_devices.size()) {
hip::setCurrentDevice(device);
@@ -578,7 +634,7 @@ hipError_t hipSetDevice ( int device ) {
HIP_RETURN(hipErrorInvalidDevice);
}
hipError_t hipSetDeviceFlags ( unsigned int flags ) {
hipError_t hipSetDeviceFlags(unsigned int flags) {
HIP_INIT_API(hipSetDeviceFlags, flags);
if (g_devices.empty()) {
HIP_RETURN(hipErrorNoDevice);
@@ -590,9 +646,10 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) {
// Only one scheduling flag allowed a time
uint32_t scheduleFlag = flags & hipDeviceScheduleMask;
if (((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleSpin) && ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleYield)
&& ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleBlockingSync)
&& ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleAuto)) {
if (((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleSpin) &&
((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleYield) &&
((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleBlockingSync) &&
((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleAuto)) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -625,7 +682,7 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) {
HIP_RETURN(hipSuccess);
}
hipError_t hipSetValidDevices ( int* device_arr, int len ) {
hipError_t hipSetValidDevices(int* device_arr, int len) {
HIP_INIT_API(hipSetValidDevices, device_arr, len);
assert(0 && "Unimplemented");
+8 -1
Переглянути файл
@@ -525,4 +525,11 @@ global:
hipArray3DGetDescriptor;
local:
*;
} hip_5.5;
} hip_5.5;
hip_6.0 {
global:
hipGetDevicePropertiesR0600;
local:
*;
} hip_5.6;
+1 -1
Переглянути файл
@@ -151,7 +151,7 @@ hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
// Linear layout access is supported if P2P is enabled
// Opaque Images are supported only on homogeneous systems
// Might have more conditions to check, in future.
if (srcDeviceProp.gcnArch == dstDeviceProp.gcnArch) {
if (std::string(srcDeviceProp.gcnArchName) == std::string(dstDeviceProp.gcnArchName)) {
HIP_RETURN_ONFAIL(canAccessPeer(value, srcDevice, dstDevice));
} else {
*value = 0;
+6 -1
Переглянути файл
@@ -76,7 +76,12 @@ bool RTCProgram::findIsa() {
}
void* sym_hipGetDevice = amd::Os::getSymbol(handle, "hipGetDevice");
void* sym_hipGetDeviceProperties = amd::Os::getSymbol(handle, "hipGetDeviceProperties");
void* sym_hipGetDeviceProperties =
amd::Os::getSymbol(handle, "hipGetDevicePropertiesR0600"); // Try to find the new symbol
if (sym_hipGetDeviceProperties == nullptr) {
sym_hipGetDeviceProperties =
amd::Os::getSymbol(handle, "hipGetDeviceProperties"); // Fall back to old one
}
if (sym_hipGetDevice == nullptr || sym_hipGetDeviceProperties == nullptr) {
LogInfo("ISA cannot be found to dlsym failure");
+6 -1
Переглянути файл
@@ -344,6 +344,9 @@ struct Info : public amd::EmbeddedObject {
//! Max width of 2D image in pixels.
size_t image2DMaxWidth_;
//! Max width of 2DA image in pixels.
size_t image2DAMaxWidth_[2];
//! Max height of 2D image in pixels.
size_t image2DMaxHeight_;
@@ -480,6 +483,9 @@ struct Info : public amd::EmbeddedObject {
//! Returns max number of pixels for a 1D image
size_t image1DMaxWidth_;
//! Returns max number of pixels for a 1DA image
size_t image1DAMaxWidth_;
//! Returns max number of pixels for a 1D image created from a buffer object
size_t imageMaxBufferSize_;
@@ -1526,7 +1532,6 @@ class Isa {
uint32_t memChannelBankWidth_; //!< Memory channel bank width.
uint32_t localMemSizePerCU_; //!< Local memory size per CU.
uint32_t localMemBanks_; //!< Number of banks of local memory.
}; // class Isa
/*! \addtogroup Runtime
+3
Переглянути файл
@@ -563,6 +563,9 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.maxMemAllocSize_ / kPixelRgbaSize);
info_.image1DMaxWidth_ = maxTextureSize;
info_.imageMaxArraySize_ = MaxImageArraySize;
info_.image2DAMaxWidth_[0] = MaxImageArraySize;
info_.image2DAMaxWidth_[1] = MaxImageArraySize;
info_.image1DAMaxWidth_ = maxTextureSize;
info_.preferredInteropUserSync_ = true;
info_.printfBufferSize_ = PrintfDbg::WorkitemDebugSize * info().maxWorkGroupSize_;
+21
Переглянути файл
@@ -1521,6 +1521,27 @@ bool Device::populateOCLDeviceConstants() {
info_.imageMaxArraySize_ = max_array_size;
uint32_t max_image1da_width = 0;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(bkendDevice_,
static_cast<hsa_agent_info_t>(HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS),
&max_image1da_width)) {
return false;
}
info_.image1DAMaxWidth_ = max_image1da_width;
uint32_t max_image2da_width[2] = {0, 0};
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(bkendDevice_,
static_cast<hsa_agent_info_t>(HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS),
&max_image2da_width)) {
return false;
}
info_.image2DAMaxWidth_[0] = max_image2da_width[0];
info_.image2DAMaxWidth_[1] = max_image2da_width[1];
uint32_t max_image1d_width = 0;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(bkendDevice_,