SWDEV-306642 - [ABI Break] Add texture/surface/device capabilities device struct entries
- alias hipGetDeviceProperties to hipGetDevicePropertiesR0600 - alias hipDeviceProp_t to hipDeviceProp_tR0600 - remove gcnArch from new device property struct - add new requested struct members Change-Id: If3f5dbef3d608487d9f6f419285f4bf577ea9bf0
Цей коміт міститься в:
зафіксовано
Rakesh Roy
джерело
813907c29d
коміт
2989840511
@@ -93,7 +93,7 @@ enum hip_api_id_t {
|
||||
HIP_API_ID_hipGetDevice = 78,
|
||||
HIP_API_ID_hipGetDeviceCount = 79,
|
||||
HIP_API_ID_hipGetDeviceFlags = 80,
|
||||
HIP_API_ID_hipGetDeviceProperties = 81,
|
||||
HIP_API_ID_hipGetDevicePropertiesR0600 = 81,
|
||||
HIP_API_ID_RESERVED_82 = 82,
|
||||
HIP_API_ID_hipGetErrorString = 83,
|
||||
HIP_API_ID_hipGetLastError = 84,
|
||||
@@ -518,7 +518,7 @@ static inline const char* hip_api_name(const uint32_t id) {
|
||||
case HIP_API_ID_hipGetDevice: return "hipGetDevice";
|
||||
case HIP_API_ID_hipGetDeviceCount: return "hipGetDeviceCount";
|
||||
case HIP_API_ID_hipGetDeviceFlags: return "hipGetDeviceFlags";
|
||||
case HIP_API_ID_hipGetDeviceProperties: return "hipGetDeviceProperties";
|
||||
case HIP_API_ID_hipGetDevicePropertiesR0600: return "hipGetDevicePropertiesR0600";
|
||||
case HIP_API_ID_hipGetErrorString: return "hipGetErrorString";
|
||||
case HIP_API_ID_hipGetLastError: return "hipGetLastError";
|
||||
case HIP_API_ID_hipGetMipmappedArrayLevel: return "hipGetMipmappedArrayLevel";
|
||||
@@ -885,7 +885,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
|
||||
if (strcmp("hipGetDevice", name) == 0) return HIP_API_ID_hipGetDevice;
|
||||
if (strcmp("hipGetDeviceCount", name) == 0) return HIP_API_ID_hipGetDeviceCount;
|
||||
if (strcmp("hipGetDeviceFlags", name) == 0) return HIP_API_ID_hipGetDeviceFlags;
|
||||
if (strcmp("hipGetDeviceProperties", name) == 0) return HIP_API_ID_hipGetDeviceProperties;
|
||||
if (strcmp("hipGetDevicePropertiesR0600", name) == 0) return HIP_API_ID_hipGetDevicePropertiesR0600;
|
||||
if (strcmp("hipGetErrorString", name) == 0) return HIP_API_ID_hipGetErrorString;
|
||||
if (strcmp("hipGetLastError", name) == 0) return HIP_API_ID_hipGetLastError;
|
||||
if (strcmp("hipGetMipmappedArrayLevel", name) == 0) return HIP_API_ID_hipGetMipmappedArrayLevel;
|
||||
@@ -1213,8 +1213,8 @@ typedef struct hip_api_data_s {
|
||||
struct {
|
||||
int* device;
|
||||
int device__val;
|
||||
const hipDeviceProp_t* prop;
|
||||
hipDeviceProp_t prop__val;
|
||||
const hipDeviceProp_tR0600* prop;
|
||||
hipDeviceProp_tR0600 prop__val;
|
||||
} hipChooseDevice;
|
||||
struct {
|
||||
dim3 gridDim;
|
||||
@@ -1628,10 +1628,10 @@ typedef struct hip_api_data_s {
|
||||
unsigned int flags__val;
|
||||
} hipGetDeviceFlags;
|
||||
struct {
|
||||
hipDeviceProp_t* props;
|
||||
hipDeviceProp_t props__val;
|
||||
hipDeviceProp_tR0600* props;
|
||||
hipDeviceProp_tR0600 props__val;
|
||||
hipDevice_t device;
|
||||
} hipGetDeviceProperties;
|
||||
} hipGetDevicePropertiesR0600;
|
||||
struct {
|
||||
hipArray_t* levelArray;
|
||||
hipArray_t levelArray__val;
|
||||
@@ -3325,10 +3325,10 @@ typedef struct hip_api_data_s {
|
||||
cb_data.args.hipArrayGetInfo.flags = (unsigned int*)flags; \
|
||||
cb_data.args.hipArrayGetInfo.array = (hipArray*)array; \
|
||||
};
|
||||
// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')]
|
||||
// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_tR0600*', 'prop')]
|
||||
#define INIT_hipChooseDevice_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipChooseDevice.device = (int*)device; \
|
||||
cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_t*)properties; \
|
||||
cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_tR0600*)properties; \
|
||||
};
|
||||
// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')]
|
||||
#define INIT_hipConfigureCall_CB_ARGS_DATA(cb_data) { \
|
||||
@@ -3779,10 +3779,10 @@ typedef struct hip_api_data_s {
|
||||
#define INIT_hipGetDeviceFlags_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipGetDeviceFlags.flags = (unsigned int*)flags; \
|
||||
};
|
||||
// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')]
|
||||
#define INIT_hipGetDeviceProperties_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipGetDeviceProperties.props = (hipDeviceProp_t*)props; \
|
||||
cb_data.args.hipGetDeviceProperties.device = (hipDevice_t)device; \
|
||||
// hipGetDeviceProperties[('hipDeviceProp_tR0600*', 'props'), ('hipDevice_t', 'device')]
|
||||
#define INIT_hipGetDevicePropertiesR0600_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipGetDevicePropertiesR0600.props = (hipDeviceProp_tR0600*)props; \
|
||||
cb_data.args.hipGetDevicePropertiesR0600.device = (hipDevice_t)device; \
|
||||
};
|
||||
// hipGetErrorString[]
|
||||
#define INIT_hipGetErrorString_CB_ARGS_DATA(cb_data) { \
|
||||
@@ -5573,7 +5573,7 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
|
||||
if (data->args.hipArrayGetInfo.flags) data->args.hipArrayGetInfo.flags__val = *(data->args.hipArrayGetInfo.flags);
|
||||
if (data->args.hipArrayGetInfo.array) data->args.hipArrayGetInfo.array__val = *(data->args.hipArrayGetInfo.array);
|
||||
break;
|
||||
// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')]
|
||||
// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_tR0600*', 'prop')]
|
||||
case HIP_API_ID_hipChooseDevice:
|
||||
if (data->args.hipChooseDevice.device) data->args.hipChooseDevice.device__val = *(data->args.hipChooseDevice.device);
|
||||
if (data->args.hipChooseDevice.prop) data->args.hipChooseDevice.prop__val = *(data->args.hipChooseDevice.prop);
|
||||
@@ -5908,9 +5908,9 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
|
||||
case HIP_API_ID_hipGetDeviceFlags:
|
||||
if (data->args.hipGetDeviceFlags.flags) data->args.hipGetDeviceFlags.flags__val = *(data->args.hipGetDeviceFlags.flags);
|
||||
break;
|
||||
// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')]
|
||||
case HIP_API_ID_hipGetDeviceProperties:
|
||||
if (data->args.hipGetDeviceProperties.props) data->args.hipGetDeviceProperties.props__val = *(data->args.hipGetDeviceProperties.props);
|
||||
// hipGetDevicePropertiesR0600[('hipDeviceProp_tR0600*', 'props'), ('hipDevice_t', 'device')]
|
||||
case HIP_API_ID_hipGetDevicePropertiesR0600:
|
||||
if (data->args.hipGetDevicePropertiesR0600.props) data->args.hipGetDevicePropertiesR0600.props__val = *(data->args.hipGetDevicePropertiesR0600.props);
|
||||
break;
|
||||
// hipGetErrorString[]
|
||||
case HIP_API_ID_hipGetErrorString:
|
||||
@@ -7626,11 +7626,11 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
|
||||
else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceFlags.flags__val); }
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipGetDeviceProperties:
|
||||
oss << "hipGetDeviceProperties(";
|
||||
if (data->args.hipGetDeviceProperties.props == NULL) oss << "props=NULL";
|
||||
else { oss << "props="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceProperties.props__val); }
|
||||
oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceProperties.device);
|
||||
case HIP_API_ID_hipGetDevicePropertiesR0600:
|
||||
oss << "hipGetDevicePropertiesR0600(";
|
||||
if (data->args.hipGetDevicePropertiesR0600.props == NULL) oss << "props=NULL";
|
||||
else { oss << "props="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0600.props__val); }
|
||||
oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0600.device);
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipGetErrorString:
|
||||
|
||||
@@ -32,11 +32,11 @@ THE SOFTWARE.
|
||||
typedef int hipLaunchParm;
|
||||
|
||||
#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \
|
||||
do { \
|
||||
kernelName<<<numBlocks, numThreads, memPerBlock, streamId>>>(__VA_ARGS__); \
|
||||
} while (0)
|
||||
do { \
|
||||
kernelName<<<numBlocks, numThreads, memPerBlock, streamId>>>(__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
|
||||
#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__)
|
||||
|
||||
#define hipReadModeElementType cudaReadModeElementType
|
||||
|
||||
@@ -105,15 +105,15 @@ typedef int hipLaunchParm;
|
||||
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
|
||||
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
#define abort_() \
|
||||
{ asm("trap;"); }
|
||||
#define abort_() \
|
||||
{ asm("trap;"); }
|
||||
#undef assert
|
||||
#define assert(COND) \
|
||||
{ \
|
||||
if (!COND) { \
|
||||
abort_(); \
|
||||
} \
|
||||
}
|
||||
{ \
|
||||
if (!COND) { \
|
||||
abort_(); \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define __clock() clock()
|
||||
|
||||
@@ -1980,87 +1980,143 @@ inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int valu
|
||||
}
|
||||
|
||||
inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
|
||||
|
||||
if (p_prop == NULL) {
|
||||
return hipErrorInvalidValue;
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
struct cudaDeviceProp cdprop;
|
||||
cudaError_t cerror;
|
||||
cerror = cudaGetDeviceProperties(&cdprop, device);
|
||||
hipError_t error = hipCUDAErrorTohipError(cudaGetDeviceProperties(&cdprop, device));
|
||||
|
||||
if (error != hipSuccess) {
|
||||
return error;
|
||||
}
|
||||
|
||||
strncpy(p_prop->name, cdprop.name, 256);
|
||||
strncpy(p_prop->uuid.bytes, cdprop.uuid.bytes, 16);
|
||||
strncpy(p_prop->luid, cdprop.luid, 8);
|
||||
p_prop->luidDeviceNodeMask = cdprop.luidDeviceNodeMask;
|
||||
p_prop->totalGlobalMem = cdprop.totalGlobalMem;
|
||||
p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock;
|
||||
p_prop->regsPerBlock = cdprop.regsPerBlock;
|
||||
p_prop->warpSize = cdprop.warpSize;
|
||||
p_prop->memPitch = cdprop.memPitch;
|
||||
p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i];
|
||||
p_prop->maxGridSize[i] = cdprop.maxGridSize[i];
|
||||
}
|
||||
p_prop->maxThreadsDim[0] = cdprop.maxThreadsDim[0];
|
||||
p_prop->maxThreadsDim[1] = cdprop.maxThreadsDim[1];
|
||||
p_prop->maxThreadsDim[2] = cdprop.maxThreadsDim[2];
|
||||
p_prop->maxGridSize[0] = cdprop.maxGridSize[0];
|
||||
p_prop->maxGridSize[1] = cdprop.maxGridSize[1];
|
||||
p_prop->maxGridSize[2] = cdprop.maxGridSize[2];
|
||||
p_prop->clockRate = cdprop.clockRate;
|
||||
p_prop->memoryClockRate = cdprop.memoryClockRate;
|
||||
p_prop->memoryBusWidth = cdprop.memoryBusWidth;
|
||||
p_prop->totalConstMem = cdprop.totalConstMem;
|
||||
p_prop->major = cdprop.major;
|
||||
p_prop->minor = cdprop.minor;
|
||||
p_prop->textureAlignment = cdprop.textureAlignment;
|
||||
p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
|
||||
p_prop->deviceOverlap = cdprop.deviceOverlap;
|
||||
p_prop->multiProcessorCount = cdprop.multiProcessorCount;
|
||||
p_prop->l2CacheSize = cdprop.l2CacheSize;
|
||||
p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
|
||||
p_prop->computeMode = cdprop.computeMode;
|
||||
p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate:
|
||||
|
||||
int ccVers = p_prop->major * 100 + p_prop->minor * 10;
|
||||
p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110);
|
||||
p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110);
|
||||
p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120);
|
||||
p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120);
|
||||
p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200);
|
||||
p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120);
|
||||
p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110);
|
||||
p_prop->arch.hasDoubles = (ccVers >= 130);
|
||||
p_prop->arch.hasWarpVote = (ccVers >= 120);
|
||||
p_prop->arch.hasWarpBallot = (ccVers >= 200);
|
||||
p_prop->arch.hasWarpShuffle = (ccVers >= 300);
|
||||
p_prop->arch.hasFunnelShift = (ccVers >= 350);
|
||||
p_prop->arch.hasThreadFenceSystem = (ccVers >= 200);
|
||||
p_prop->arch.hasSyncThreadsExt = (ccVers >= 200);
|
||||
p_prop->arch.hasSurfaceFuncs = (ccVers >= 200);
|
||||
p_prop->arch.has3dGrid = (ccVers >= 200);
|
||||
p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
|
||||
|
||||
p_prop->concurrentKernels = cdprop.concurrentKernels;
|
||||
p_prop->pciDomainID = cdprop.pciDomainID;
|
||||
p_prop->pciBusID = cdprop.pciBusID;
|
||||
p_prop->pciDeviceID = cdprop.pciDeviceID;
|
||||
p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor;
|
||||
p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
|
||||
p_prop->canMapHostMemory = cdprop.canMapHostMemory;
|
||||
p_prop->gcnArch = 0; // Not a GCN arch
|
||||
p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
|
||||
p_prop->integrated = cdprop.integrated;
|
||||
p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
|
||||
p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
|
||||
p_prop->cooperativeMultiDeviceUnmatchedFunc = 0;
|
||||
p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0;
|
||||
p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0;
|
||||
p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0;
|
||||
|
||||
p_prop->maxTexture1D = cdprop.maxTexture1D;
|
||||
p_prop->canMapHostMemory = cdprop.canMapHostMemory;
|
||||
p_prop->computeMode = cdprop.computeMode;
|
||||
p_prop->maxTexture1D = cdprop.maxTexture1D;
|
||||
p_prop->maxTexture1DMipmap = cdprop.maxTexture1DMipmap;
|
||||
p_prop->maxTexture1DLinear = cdprop.maxTexture1DLinear;
|
||||
p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0];
|
||||
p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1];
|
||||
p_prop->maxTexture2DMipmap[0] = cdprop.maxTexture2DMipmap[0];
|
||||
p_prop->maxTexture2DMipmap[1] = cdprop.maxTexture2DMipmap[1];
|
||||
p_prop->maxTexture2DLinear[0] = cdprop.maxTexture2DLinear[0];
|
||||
p_prop->maxTexture2DLinear[1] = cdprop.maxTexture2DLinear[1];
|
||||
p_prop->maxTexture2DLinear[2] = cdprop.maxTexture2DLinear[2];
|
||||
p_prop->maxTexture2DGather[0] = cdprop.maxTexture2DGather[0];
|
||||
p_prop->maxTexture2DGather[1] = cdprop.maxTexture2DGather[1];
|
||||
p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0];
|
||||
p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1];
|
||||
p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2];
|
||||
p_prop->maxTexture3DAlt[0] = cdprop.maxTexture3DAlt[0];
|
||||
p_prop->maxTexture3DAlt[1] = cdprop.maxTexture3DAlt[1];
|
||||
p_prop->maxTexture3DAlt[2] = cdprop.maxTexture3DAlt[2];
|
||||
p_prop->maxTextureCubemap = cdprop.maxTextureCubemap;
|
||||
p_prop->maxTexture1DLayered[0] = cdprop.maxTexture1DLayered[0];
|
||||
p_prop->maxTexture1DLayered[1] = cdprop.maxTexture1DLayered[1];
|
||||
p_prop->maxTexture2DLayered[0] = cdprop.maxTexture2DLayered[0];
|
||||
p_prop->maxTexture2DLayered[1] = cdprop.maxTexture2DLayered[1];
|
||||
p_prop->maxTexture2DLayered[2] = cdprop.maxTexture2DLayered[2];
|
||||
p_prop->maxTextureCubemapLayered[0] = cdprop.maxTextureCubemapLayered[0];
|
||||
p_prop->maxTextureCubemapLayered[1] = cdprop.maxTextureCubemapLayered[1];
|
||||
p_prop->maxSurface1D = cdprop.maxSurface1D;
|
||||
p_prop->maxSurface2D[0] = cdprop.maxSurface2D[0];
|
||||
p_prop->maxSurface2D[1] = cdprop.maxSurface2D[1];
|
||||
p_prop->maxSurface3D[0] = cdprop.maxSurface3D[0];
|
||||
p_prop->maxSurface3D[1] = cdprop.maxSurface3D[1];
|
||||
p_prop->maxSurface3D[2] = cdprop.maxSurface3D[2];
|
||||
p_prop->maxSurface1DLayered[0] = cdprop.maxSurface1DLayered[0];
|
||||
p_prop->maxSurface1DLayered[1] = cdprop.maxSurface1DLayered[1];
|
||||
p_prop->maxSurface2DLayered[0] = cdprop.maxSurface2DLayered[0];
|
||||
p_prop->maxSurface2DLayered[1] = cdprop.maxSurface2DLayered[1];
|
||||
p_prop->maxSurface2DLayered[2] = cdprop.maxSurface2DLayered[2];
|
||||
p_prop->maxSurfaceCubemap = cdprop.maxSurfaceCubemap;
|
||||
p_prop->maxSurfaceCubemapLayered[0] = cdprop.maxSurfaceCubemapLayered[0];
|
||||
p_prop->maxSurfaceCubemapLayered[1] = cdprop.maxSurfaceCubemapLayered[1];
|
||||
p_prop->surfaceAlignment = cdprop.surfaceAlignment;
|
||||
p_prop->concurrentKernels = cdprop.concurrentKernels;
|
||||
p_prop->ECCEnabled = cdprop.ECCEnabled;
|
||||
p_prop->pciBusID = cdprop.pciBusID;
|
||||
p_prop->pciDeviceID = cdprop.pciDeviceID;
|
||||
p_prop->pciDomainID = cdprop.pciDomainID;
|
||||
p_prop->tccDriver = cdprop.tccDriver;
|
||||
p_prop->asyncEngineCount = cdprop.asyncEngineCount;
|
||||
p_prop->unifiedAddressing = cdprop.unifiedAddressing;
|
||||
p_prop->memoryClockRate = cdprop.memoryClockRate;
|
||||
p_prop->memoryBusWidth = cdprop.memoryBusWidth;
|
||||
p_prop->l2CacheSize = cdprop.l2CacheSize;
|
||||
p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor;
|
||||
p_prop->streamPrioritiesSupported = cdprop.streamPrioritiesSupported;
|
||||
p_prop->globalL1CacheSupported = cdprop.globalL1CacheSupported;
|
||||
p_prop->localL1CacheSupported = cdprop.localL1CacheSupported;
|
||||
p_prop->sharedMemPerMultiprocessor = cdprop.sharedMemPerMultiprocessor;
|
||||
p_prop->regsPerMultiprocessor = cdprop.regsPerMultiprocessor;
|
||||
p_prop->managedMemory = cdprop.managedMemory;
|
||||
p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard;
|
||||
p_prop->multiGpuBoardGroupID = cdprop.multiGpuBoardGroupID;
|
||||
p_prop->hostNativeAtomicSupported = cdprop.hostNativeAtomicSupported;
|
||||
p_prop->singleToDoublePrecisionPerfRatio = cdprop.singleToDoublePrecisionPerfRatio;
|
||||
p_prop->pageableMemoryAccess = cdprop.pageableMemoryAccess;
|
||||
p_prop->concurrentManagedAccess = cdprop.concurrentManagedAccess;
|
||||
p_prop->computePreemptionSupported = cdprop.computePreemptionSupported;
|
||||
p_prop->canUseHostPointerForRegisteredMem = cdprop.canUseHostPointerForRegisteredMem;
|
||||
p_prop->cooperativeLaunch = cdprop.cooperativeLaunch;
|
||||
p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch;
|
||||
p_prop->sharedMemPerBlockOptin = cdprop.sharedMemPerBlockOptin;
|
||||
p_prop->pageableMemoryAccessUsesHostPageTables = cdprop.pageableMemoryAccessUsesHostPageTables;
|
||||
p_prop->directManagedMemAccessFromHost = cdprop.directManagedMemAccessFromHost;
|
||||
|
||||
p_prop->memPitch = cdprop.memPitch;
|
||||
p_prop->textureAlignment = cdprop.textureAlignment;
|
||||
p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
|
||||
p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
|
||||
p_prop->ECCEnabled = cdprop.ECCEnabled;
|
||||
p_prop->tccDriver = cdprop.tccDriver;
|
||||
|
||||
return hipCUDAErrorTohipError(cerror);
|
||||
#if CUDA_VERSION >= 11010
|
||||
p_prop->accessPolicyMaxWindowSize = cdprop.accessPolicyMaxWindowSize;
|
||||
p_prop->maxBlocksPerMultiProcessor = cdprop.maxBlocksPerMultiProcessor;
|
||||
p_prop->persistingL2CacheMaxSize = cdprop.persistingL2CacheMaxSize;
|
||||
p_prop->reservedSharedMemPerBlock = cdprop.reservedSharedMemPerBlock;
|
||||
p_prop->warpSize = cdprop.warpSize;
|
||||
#endif
|
||||
|
||||
#if CUDA_VERSION >= 12000
|
||||
p_prop->clusterLaunch = cdprop.clusterLaunch;
|
||||
p_prop->deferredMappingHipArraySupported = cdprop.deferredMappingCudaArraySupported;
|
||||
p_prop->gpuDirectRDMAFlushWritesOptions = cdprop.gpuDirectRDMAFlushWritesOptions;
|
||||
p_prop->gpuDirectRDMASupported = cdprop.gpuDirectRDMASupported;
|
||||
p_prop->gpuDirectRDMAWritesOrdering = cdprop.gpuDirectRDMAWritesOrdering;
|
||||
p_prop->hostRegisterReadOnlySupported = cdprop.hostRegisterReadOnlySupported;
|
||||
p_prop->hostRegisterSupported = cdprop.hostRegisterSupported;
|
||||
p_prop->ipcEventSupported = cdprop.ipcEventSupported;
|
||||
p_prop->memoryPoolSupportedHandleTypes = cdprop.memoryPoolSupportedHandleTypes;
|
||||
p_prop->memoryPoolsSupported = cdprop.memoryPoolsSupported;
|
||||
p_prop->sparseHipArraySupported = cdprop.sparseCudaArraySupported;
|
||||
p_prop->timelineSemaphoreInteropSupported = cdprop.timelineSemaphoreInteropSupported;
|
||||
p_prop->unifiedFunctionPointers = cdprop.unifiedFunctionPointers;
|
||||
#endif
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
|
||||
@@ -2188,6 +2244,9 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
|
||||
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
|
||||
cdattr = cudaDevAttrCooperativeMultiDeviceLaunch;
|
||||
break;
|
||||
case hipDeviceAttributeHostRegisterSupported:
|
||||
cdattr = cudaDevAttrHostRegisterSupported;
|
||||
break;
|
||||
case hipDeviceAttributeConcurrentManagedAccess:
|
||||
cdattr = cudaDevAttrConcurrentManagedAccess;
|
||||
break;
|
||||
|
||||
@@ -63,6 +63,7 @@ hipFuncSetSharedMemConfig
|
||||
hipGetDevice
|
||||
hipGetDeviceCount
|
||||
hipGetDeviceProperties
|
||||
hipGetDevicePropertiesR0600
|
||||
hipGetErrorName
|
||||
hipGetErrorString
|
||||
hipGetLastError
|
||||
|
||||
@@ -23,6 +23,9 @@
|
||||
#include "hip_internal.hpp"
|
||||
#include "hip_mempool_impl.hpp"
|
||||
|
||||
#undef hipGetDeviceProperties
|
||||
#undef hipDeviceProp_t
|
||||
|
||||
namespace hip {
|
||||
|
||||
// ================================================================================================
|
||||
@@ -139,12 +142,12 @@ Device::~Device() {
|
||||
graph_mem_pool_->release();
|
||||
}
|
||||
|
||||
if (null_stream_!= nullptr) {
|
||||
if (null_stream_ != nullptr) {
|
||||
hip::Stream::Destroy(null_stream_);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace hip
|
||||
|
||||
void ihipDestroyDevice() {
|
||||
for (auto deviceHandle : g_devices) {
|
||||
@@ -171,8 +174,7 @@ hipError_t hipDeviceGet(hipDevice_t* device, int deviceId) {
|
||||
HIP_RETURN(ihipDeviceGet(device, deviceId));
|
||||
}
|
||||
|
||||
hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) {
|
||||
|
||||
hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
|
||||
HIP_INIT_API(hipDeviceTotalMem, bytes, device);
|
||||
|
||||
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
|
||||
@@ -191,8 +193,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) {
|
||||
|
||||
hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
|
||||
HIP_INIT_API(hipDeviceComputeCapability, major, minor, device);
|
||||
|
||||
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
|
||||
@@ -232,8 +233,7 @@ hipError_t ihipDeviceGetCount(int* count) {
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) {
|
||||
|
||||
hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
|
||||
HIP_INIT_API(hipDeviceGetName, (void*)name, len, device);
|
||||
|
||||
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
|
||||
@@ -275,7 +275,7 @@ hipError_t hipDeviceGetUuid(hipUUID* uuid, hipDevice_t device) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
|
||||
hipError_t ihipGetDeviceProperties(hipDeviceProp_tR0600* props, hipDevice_t device) {
|
||||
if (props == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
@@ -286,7 +286,278 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
|
||||
auto* deviceHandle = g_devices[device]->devices()[0];
|
||||
|
||||
constexpr auto int32_max = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
|
||||
constexpr auto uint16_max = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max())+1;
|
||||
constexpr auto uint16_max = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1;
|
||||
hipDeviceProp_tR0600 deviceProps = {0};
|
||||
|
||||
const auto& info = deviceHandle->info();
|
||||
const auto& isa = deviceHandle->isa();
|
||||
::strncpy(deviceProps.name, info.boardName_, sizeof(info.boardName_));
|
||||
::strncpy(deviceProps.uuid.bytes, info.uuid_, sizeof(info.uuid_));
|
||||
deviceProps.totalGlobalMem = info.globalMemSize_;
|
||||
deviceProps.sharedMemPerBlock = info.localMemSizePerCU_;
|
||||
deviceProps.sharedMemPerMultiprocessor = info.localMemSizePerCU_ * info.numRTCUs_;
|
||||
deviceProps.regsPerBlock = info.availableRegistersPerCU_;
|
||||
deviceProps.warpSize = info.wavefrontWidth_;
|
||||
deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_;
|
||||
deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0];
|
||||
deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1];
|
||||
deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2];
|
||||
deviceProps.maxGridSize[0] = int32_max;
|
||||
deviceProps.maxGridSize[1] = uint16_max;
|
||||
deviceProps.maxGridSize[2] = uint16_max;
|
||||
deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000;
|
||||
deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000;
|
||||
deviceProps.memoryBusWidth = info.globalMemChannels_;
|
||||
deviceProps.totalConstMem = std::min(info.maxConstantBufferSize_, int32_max);
|
||||
deviceProps.major = isa.versionMajor();
|
||||
deviceProps.minor = isa.versionMinor();
|
||||
deviceProps.multiProcessorCount = info.maxComputeUnits_;
|
||||
deviceProps.l2CacheSize = info.l2CacheSize_;
|
||||
deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_;
|
||||
deviceProps.maxBlocksPerMultiProcessor = int(info.maxThreadsPerCU_ / info.maxWorkGroupSize_);
|
||||
deviceProps.computeMode = 0;
|
||||
deviceProps.clockInstructionRate = info.timeStampFrequency_;
|
||||
deviceProps.arch.hasGlobalInt32Atomics = 1;
|
||||
deviceProps.arch.hasGlobalFloatAtomicExch = 1;
|
||||
deviceProps.arch.hasSharedInt32Atomics = 1;
|
||||
deviceProps.arch.hasSharedFloatAtomicExch = 1;
|
||||
deviceProps.arch.hasFloatAtomicAdd = 1;
|
||||
deviceProps.arch.hasGlobalInt64Atomics = 1;
|
||||
deviceProps.arch.hasSharedInt64Atomics = 1;
|
||||
deviceProps.hostNativeAtomicSupported = info.pcie_atomics_ ? 1 : 0;
|
||||
deviceProps.arch.hasDoubles = 1;
|
||||
deviceProps.arch.hasWarpVote = 1;
|
||||
deviceProps.arch.hasWarpBallot = 1;
|
||||
deviceProps.arch.hasWarpShuffle = 1;
|
||||
deviceProps.arch.hasFunnelShift = 0;
|
||||
deviceProps.arch.hasThreadFenceSystem = 1;
|
||||
deviceProps.arch.hasSyncThreadsExt = 0;
|
||||
deviceProps.arch.hasSurfaceFuncs = 0;
|
||||
deviceProps.arch.has3dGrid = 1;
|
||||
deviceProps.arch.hasDynamicParallelism = 0;
|
||||
deviceProps.concurrentKernels = 1;
|
||||
deviceProps.pciDomainID = info.pciDomainID;
|
||||
deviceProps.pciBusID = info.deviceTopology_.pcie.bus;
|
||||
deviceProps.pciDeviceID = info.deviceTopology_.pcie.device;
|
||||
deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_;
|
||||
deviceProps.canMapHostMemory = 1;
|
||||
deviceProps.regsPerMultiprocessor = info.availableRegistersPerCU_;
|
||||
sprintf(deviceProps.gcnArchName, "%s", isa.targetId());
|
||||
deviceProps.cooperativeLaunch = info.cooperativeGroups_;
|
||||
deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_;
|
||||
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedFunc = info.cooperativeMultiDeviceGroups_;
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedGridDim = info.cooperativeMultiDeviceGroups_;
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_;
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_;
|
||||
|
||||
deviceProps.maxTexture1DLinear =
|
||||
std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes
|
||||
deviceProps.maxTexture1DMipmap = std::min(16 * info.imageMaxBufferSize_, int32_max);
|
||||
deviceProps.maxTexture1D = deviceProps.maxSurface1D = std::min(info.image1DMaxWidth_, int32_max);
|
||||
deviceProps.maxTexture2D[0] = deviceProps.maxSurface2D[0] =
|
||||
std::min(info.image2DMaxWidth_, int32_max);
|
||||
deviceProps.maxTexture2D[1] = deviceProps.maxSurface2D[1] =
|
||||
std::min(info.image2DMaxHeight_, int32_max);
|
||||
deviceProps.maxTexture3D[0] = deviceProps.maxSurface3D[0] =
|
||||
std::min(info.image3DMaxWidth_, int32_max);
|
||||
deviceProps.maxTexture3D[1] = deviceProps.maxSurface3D[1] =
|
||||
std::min(info.image3DMaxHeight_, int32_max);
|
||||
deviceProps.maxTexture3D[2] = deviceProps.maxSurface3D[2] =
|
||||
std::min(info.image3DMaxDepth_, int32_max);
|
||||
deviceProps.maxTexture1DLayered[0] = deviceProps.maxSurface1DLayered[0] =
|
||||
std::min(info.image1DAMaxWidth_, int32_max);
|
||||
deviceProps.maxTexture1DLayered[1] = deviceProps.maxSurface1DLayered[1] =
|
||||
std::min(info.imageMaxArraySize_, int32_max);
|
||||
deviceProps.maxTexture2DLayered[0] = deviceProps.maxSurface2DLayered[0] =
|
||||
std::min(info.image2DAMaxWidth_[0], int32_max);
|
||||
deviceProps.maxTexture2DLayered[1] = deviceProps.maxSurface2DLayered[1] =
|
||||
std::min(info.image2DAMaxWidth_[1], int32_max);
|
||||
deviceProps.maxTexture2DLayered[2] = deviceProps.maxSurface2DLayered[2] =
|
||||
std::min(info.imageMaxArraySize_, int32_max);
|
||||
deviceProps.hdpMemFlushCntl = info.hdpMemFlushCntl;
|
||||
deviceProps.hdpRegFlushCntl = info.hdpRegFlushCntl;
|
||||
|
||||
deviceProps.memPitch = std::min(info.maxMemAllocSize_, int32_max);
|
||||
deviceProps.textureAlignment = deviceProps.surfaceAlignment = info.imageBaseAddressAlignment_;
|
||||
deviceProps.texturePitchAlignment = info.imagePitchAlignment_;
|
||||
deviceProps.kernelExecTimeoutEnabled = 0;
|
||||
deviceProps.ECCEnabled = info.errorCorrectionSupport_ ? 1 : 0;
|
||||
deviceProps.isLargeBar = info.largeBar_ ? 1 : 0;
|
||||
deviceProps.asicRevision = info.asicRevision_;
|
||||
deviceProps.ipcEventSupported = 1;
|
||||
deviceProps.streamPrioritiesSupported = 1;
|
||||
deviceProps.multiGpuBoardGroupID = info.deviceTopology_.pcie.device;
|
||||
|
||||
// HMM capabilities
|
||||
deviceProps.asyncEngineCount = info.numAsyncQueues_;
|
||||
deviceProps.deviceOverlap = (info.numAsyncQueues_ > 0) ? 1 : 0;
|
||||
deviceProps.unifiedAddressing = info.hmmDirectHostAccess_;
|
||||
deviceProps.managedMemory = info.hmmSupported_;
|
||||
deviceProps.concurrentManagedAccess = info.hmmSupported_;
|
||||
deviceProps.directManagedMemAccessFromHost = info.hmmDirectHostAccess_;
|
||||
deviceProps.canUseHostPointerForRegisteredMem = info.hostUnifiedMemory_;
|
||||
deviceProps.pageableMemoryAccess = info.hmmCpuMemoryAccessible_;
|
||||
deviceProps.hostRegisterSupported = info.hostUnifiedMemory_;
|
||||
deviceProps.pageableMemoryAccessUsesHostPageTables = info.hostUnifiedMemory_;
|
||||
|
||||
// Mem pool
|
||||
deviceProps.memoryPoolsSupported = HIP_MEM_POOL_SUPPORT;
|
||||
deviceProps.memoryPoolSupportedHandleTypes = 0;
|
||||
|
||||
// Caching behavior
|
||||
deviceProps.globalL1CacheSupported = 1;
|
||||
deviceProps.localL1CacheSupported = 1;
|
||||
deviceProps.persistingL2CacheMaxSize = info.l2CacheSize_;
|
||||
deviceProps.reservedSharedMemPerBlock = 0;
|
||||
deviceProps.sharedMemPerBlockOptin = 0;
|
||||
|
||||
// Unsupported features
|
||||
// Single to double precision perf ratio
|
||||
deviceProps.singleToDoublePrecisionPerfRatio = 0;
|
||||
// Flag hipHostRegisterReadOnly
|
||||
deviceProps.hostRegisterReadOnlySupported = 0;
|
||||
// Compute preemption
|
||||
deviceProps.computePreemptionSupported = 0;
|
||||
// Cubemaps
|
||||
deviceProps.maxTextureCubemap = 0;
|
||||
deviceProps.maxTextureCubemapLayered[0] = 0;
|
||||
deviceProps.maxTextureCubemapLayered[1] = 0;
|
||||
deviceProps.maxSurfaceCubemap = 0;
|
||||
deviceProps.maxSurfaceCubemapLayered[0] = 0;
|
||||
deviceProps.maxSurfaceCubemapLayered[1] = 0;
|
||||
// Texture gather ops
|
||||
deviceProps.maxTexture2DGather[0] = 0;
|
||||
deviceProps.maxTexture2DGather[1] = 0;
|
||||
// Textures bound to pitch memory
|
||||
deviceProps.maxTexture2DLinear[0] = 0;
|
||||
deviceProps.maxTexture2DLinear[1] = 0;
|
||||
deviceProps.maxTexture2DLinear[2] = 0;
|
||||
// Alternate 3D texture
|
||||
deviceProps.maxTexture3DAlt[0] = 0;
|
||||
deviceProps.maxTexture3DAlt[1] = 0;
|
||||
deviceProps.maxTexture3DAlt[2] = 0;
|
||||
// access policy
|
||||
deviceProps.accessPolicyMaxWindowSize = 0;
|
||||
// cluster launch
|
||||
deviceProps.clusterLaunch = 0;
|
||||
// Mapping HIP array
|
||||
deviceProps.deferredMappingHipArraySupported = 0;
|
||||
// RDMA options
|
||||
deviceProps.gpuDirectRDMASupported = 0;
|
||||
deviceProps.gpuDirectRDMAFlushWritesOptions = 0;
|
||||
deviceProps.gpuDirectRDMAWritesOrdering = 0;
|
||||
// luid - TODO populate it only on windows
|
||||
std::memset(deviceProps.luid, 0, sizeof(deviceProps.luid));
|
||||
deviceProps.luidDeviceNodeMask = 0;
|
||||
deviceProps.sparseHipArraySupported = 0;
|
||||
deviceProps.timelineSemaphoreInteropSupported = 0;
|
||||
deviceProps.unifiedFunctionPointers = 0;
|
||||
|
||||
*props = deviceProps;
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipGetDevicePropertiesR0600(hipDeviceProp_tR0600* props, hipDevice_t device) {
|
||||
HIP_INIT_API(hipGetDevicePropertiesR0600, props, device);
|
||||
|
||||
HIP_RETURN(ihipGetDeviceProperties(props, device));
|
||||
}
|
||||
|
||||
extern "C" typedef struct hipDeviceProp_t {
|
||||
char name[256]; ///< Device name.
|
||||
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
|
||||
size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes).
|
||||
int regsPerBlock; ///< Registers per block.
|
||||
int warpSize; ///< Warp size.
|
||||
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
|
||||
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
|
||||
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
|
||||
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
|
||||
int memoryClockRate; ///< Max global memory clock frequency in khz.
|
||||
int memoryBusWidth; ///< Global memory bus width in bits.
|
||||
size_t totalConstMem; ///< Size of shared memory region (in bytes).
|
||||
int major; ///< Major compute capability. On HCC, this is an approximation and features may
|
||||
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
||||
///< feature caps.
|
||||
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
|
||||
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
||||
///< feature caps.
|
||||
int multiProcessorCount; ///< Number of multi-processors (compute units).
|
||||
int l2CacheSize; ///< L2 cache size.
|
||||
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
|
||||
int computeMode; ///< Compute mode.
|
||||
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
|
||||
///< instructions. New for HIP.
|
||||
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
|
||||
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
|
||||
int pciDomainID; ///< PCI Domain ID
|
||||
int pciBusID; ///< PCI Bus ID.
|
||||
int pciDeviceID; ///< PCI Device ID.
|
||||
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor.
|
||||
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
|
||||
int canMapHostMemory; ///< Check whether HIP can map host memory
|
||||
int gcnArch; ///< DEPRECATED: use gcnArchName instead
|
||||
char gcnArchName[256]; ///< AMD GCN Arch Name.
|
||||
int integrated; ///< APU vs dGPU
|
||||
int cooperativeLaunch; ///< HIP device supports cooperative launch
|
||||
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple
|
||||
///< devices
|
||||
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
|
||||
int maxTexture1D; ///< Maximum number of elements in 1D images
|
||||
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
|
||||
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image
|
||||
///< elements
|
||||
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
|
||||
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
|
||||
size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies
|
||||
size_t textureAlignment; ///< Alignment requirement for textures
|
||||
size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to
|
||||
///< pitched memory
|
||||
int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device
|
||||
int ECCEnabled; ///< Device has ECC support enabled
|
||||
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
|
||||
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on
|
||||
///< multiple
|
||||
/// devices with unmatched functions
|
||||
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on
|
||||
///< multiple
|
||||
/// devices with unmatched grid dimensions
|
||||
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on
|
||||
///< multiple
|
||||
/// devices with unmatched block dimensions
|
||||
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on
|
||||
///< multiple
|
||||
/// devices with unmatched shared memories
|
||||
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
|
||||
int asicRevision; ///< Revision of the GPU in this device
|
||||
int managedMemory; ///< Device supports allocating managed memory on this system
|
||||
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device
|
||||
///< without migration
|
||||
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with
|
||||
///< the CPU
|
||||
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
|
||||
///< without calling hipHostRegister on it
|
||||
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's
|
||||
///< page tables
|
||||
} hipDeviceProp_t;
|
||||
|
||||
extern "C" hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
|
||||
// Removing this API from tracing.
|
||||
// This API is now in backwards compatibility mode and is not callable from newly compiled apps.
|
||||
HIP_INIT_VOID();
|
||||
|
||||
if (props == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
if (unsigned(device) >= g_devices.size()) {
|
||||
return hipErrorInvalidDevice;
|
||||
}
|
||||
auto* deviceHandle = g_devices[device]->devices()[0];
|
||||
|
||||
constexpr auto int32_max = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
|
||||
constexpr auto uint16_max = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1;
|
||||
hipDeviceProp_t deviceProps = {0};
|
||||
|
||||
const auto& info = deviceHandle->info();
|
||||
@@ -348,8 +619,9 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_;
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_;
|
||||
|
||||
deviceProps.maxTexture1DLinear = std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes
|
||||
deviceProps.maxTexture1D = std::min(info.image1DMaxWidth_, int32_max);
|
||||
deviceProps.maxTexture1DLinear =
|
||||
std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes
|
||||
deviceProps.maxTexture1D = std::min(info.image1DMaxWidth_, int32_max);
|
||||
deviceProps.maxTexture2D[0] = std::min(info.image2DMaxWidth_, int32_max);
|
||||
deviceProps.maxTexture2D[1] = std::min(info.image2DMaxHeight_, int32_max);
|
||||
deviceProps.maxTexture3D[0] = std::min(info.image3DMaxWidth_, int32_max);
|
||||
@@ -376,9 +648,3 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
|
||||
*props = deviceProps;
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) {
|
||||
HIP_INIT_API(hipGetDeviceProperties, props, device);
|
||||
|
||||
HIP_RETURN(ihipGetDeviceProperties(props, device));
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
|
||||
|
||||
HIP_INIT_API(hipChooseDevice, device, properties);
|
||||
|
||||
if (device == nullptr || properties == nullptr) {
|
||||
@@ -35,99 +34,97 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
|
||||
int count = 0;
|
||||
HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count));
|
||||
|
||||
for (cl_int i = 0; i< count; ++i) {
|
||||
for (cl_int i = 0; i < count; ++i) {
|
||||
hipDeviceProp_t currentProp = {0};
|
||||
cl_uint validPropCount = 0;
|
||||
cl_uint matchedCount = 0;
|
||||
hipError_t err = ihipGetDeviceProperties(¤tProp, i);
|
||||
if (properties->major != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.major >= properties->major) {
|
||||
if (currentProp.major >= properties->major) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if (properties->minor != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.minor >= properties->minor) {
|
||||
if (currentProp.minor >= properties->minor) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->totalGlobalMem != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.totalGlobalMem >= properties->totalGlobalMem) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->totalGlobalMem != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.totalGlobalMem >= properties->totalGlobalMem) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->sharedMemPerBlock != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->sharedMemPerBlock != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->maxThreadsPerBlock != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->maxThreadsPerBlock != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->totalConstMem != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.totalConstMem >= properties->totalConstMem ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->totalConstMem != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.totalConstMem >= properties->totalConstMem) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->multiProcessorCount != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.multiProcessorCount >=
|
||||
properties->multiProcessorCount ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->multiProcessorCount != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.multiProcessorCount >= properties->multiProcessorCount) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->maxThreadsPerMultiProcessor != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.maxThreadsPerMultiProcessor >=
|
||||
properties->maxThreadsPerMultiProcessor ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->maxThreadsPerMultiProcessor != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.maxThreadsPerMultiProcessor >= properties->maxThreadsPerMultiProcessor) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->memoryClockRate != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.memoryClockRate >= properties->memoryClockRate ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->memoryClockRate != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.memoryClockRate >= properties->memoryClockRate) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->memoryBusWidth != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->memoryBusWidth != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.memoryBusWidth >= properties->memoryBusWidth) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->l2CacheSize != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.l2CacheSize >= properties->l2CacheSize ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->l2CacheSize != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.l2CacheSize >= properties->l2CacheSize) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->regsPerBlock != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.regsPerBlock >= properties->regsPerBlock ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->regsPerBlock != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.regsPerBlock >= properties->regsPerBlock) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->maxSharedMemoryPerMultiProcessor != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.maxSharedMemoryPerMultiProcessor >=
|
||||
properties->maxSharedMemoryPerMultiProcessor ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->maxSharedMemoryPerMultiProcessor != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.maxSharedMemoryPerMultiProcessor >=
|
||||
properties->maxSharedMemoryPerMultiProcessor) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->warpSize != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.warpSize >= properties->warpSize ) {
|
||||
matchedCount++;
|
||||
}
|
||||
if (properties->warpSize != 0) {
|
||||
validPropCount++;
|
||||
if (currentProp.warpSize >= properties->warpSize) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(validPropCount == matchedCount) {
|
||||
if (validPropCount == matchedCount) {
|
||||
*device = matchedCount > maxMatchedCount ? i : *device;
|
||||
maxMatchedCount = std::max(matchedCount, maxMatchedCount);
|
||||
}
|
||||
@@ -137,7 +134,6 @@ hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
|
||||
|
||||
HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device);
|
||||
|
||||
if (pi == nullptr) {
|
||||
@@ -151,214 +147,282 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
//FIXME: should we cache the props, or just select from deviceHandle->info_?
|
||||
// FIXME: should we cache the props, or just select from deviceHandle->info_?
|
||||
hipDeviceProp_t prop = {0};
|
||||
HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, device));
|
||||
|
||||
constexpr auto int32_max = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
|
||||
|
||||
switch (attr) {
|
||||
case hipDeviceAttributeMaxThreadsPerBlock:
|
||||
*pi = prop.maxThreadsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimX:
|
||||
*pi = prop.maxThreadsDim[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimY:
|
||||
*pi = prop.maxThreadsDim[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimZ:
|
||||
*pi = prop.maxThreadsDim[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimX:
|
||||
*pi = prop.maxGridSize[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimY:
|
||||
*pi = prop.maxGridSize[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimZ:
|
||||
*pi = prop.maxGridSize[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerBlock:
|
||||
*pi = prop.sharedMemPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeTotalConstantMemory:
|
||||
// size_t to int casting
|
||||
*pi = std::min(prop.totalConstMem, int32_max);
|
||||
break;
|
||||
case hipDeviceAttributeWarpSize:
|
||||
*pi = prop.warpSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxRegistersPerBlock:
|
||||
*pi = prop.regsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeClockRate:
|
||||
*pi = prop.clockRate;
|
||||
break;
|
||||
case hipDeviceAttributeWallClockRate:
|
||||
*pi = g_devices[device]->devices()[0]->info().wallClockFrequency_;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
*pi = prop.memoryClockRate;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryBusWidth:
|
||||
*pi = prop.memoryBusWidth;
|
||||
break;
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
*pi = prop.multiProcessorCount;
|
||||
break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
*pi = prop.computeMode;
|
||||
break;
|
||||
case hipDeviceAttributeL2CacheSize:
|
||||
*pi = prop.l2CacheSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
|
||||
*pi = prop.maxThreadsPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMajor:
|
||||
*pi = prop.major;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMinor:
|
||||
*pi = prop.minor;
|
||||
break;
|
||||
case hipDeviceAttributePciBusId:
|
||||
*pi = prop.pciBusID;
|
||||
break;
|
||||
case hipDeviceAttributeConcurrentKernels:
|
||||
*pi = prop.concurrentKernels;
|
||||
break;
|
||||
case hipDeviceAttributePciDeviceId:
|
||||
*pi = prop.pciDeviceID;
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
|
||||
*pi = prop.maxSharedMemoryPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeIsMultiGpuBoard:
|
||||
*pi = prop.isMultiGpuBoard;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeLaunch:
|
||||
*pi = prop.cooperativeLaunch;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
|
||||
*pi = prop.cooperativeMultiDeviceLaunch;
|
||||
break;
|
||||
case hipDeviceAttributeIntegrated:
|
||||
*pi = prop.integrated;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture1DWidth:
|
||||
*pi = prop.maxTexture1D;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture2DWidth:
|
||||
*pi = prop.maxTexture2D[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture2DHeight:
|
||||
*pi = prop.maxTexture2D[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DWidth:
|
||||
*pi = prop.maxTexture3D[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DHeight:
|
||||
*pi = prop.maxTexture3D[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DDepth:
|
||||
*pi = prop.maxTexture3D[2];
|
||||
break;
|
||||
case hipDeviceAttributeHdpMemFlushCntl:
|
||||
*reinterpret_cast<unsigned int**>(pi) = prop.hdpMemFlushCntl;
|
||||
break;
|
||||
case hipDeviceAttributeHdpRegFlushCntl:
|
||||
*reinterpret_cast<unsigned int**>(pi) = prop.hdpRegFlushCntl;
|
||||
break;
|
||||
case hipDeviceAttributeMaxPitch:
|
||||
// size_t to int casting
|
||||
*pi = std::min(prop.memPitch, int32_max);
|
||||
break;
|
||||
case hipDeviceAttributeTextureAlignment:
|
||||
*pi = prop.textureAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeTexturePitchAlignment:
|
||||
*pi = prop.texturePitchAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeKernelExecTimeout:
|
||||
*pi = prop.kernelExecTimeoutEnabled;
|
||||
break;
|
||||
case hipDeviceAttributeCanMapHostMemory:
|
||||
*pi = prop.canMapHostMemory;
|
||||
break;
|
||||
case hipDeviceAttributeEccEnabled:
|
||||
*pi = prop.ECCEnabled;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedFunc;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedGridDim;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedBlockDim;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedSharedMem;
|
||||
break;
|
||||
case hipDeviceAttributeAsicRevision:
|
||||
*pi = prop.asicRevision;
|
||||
break;
|
||||
case hipDeviceAttributeManagedMemory:
|
||||
*pi = prop.managedMemory;
|
||||
break;
|
||||
case hipDeviceAttributeDirectManagedMemAccessFromHost:
|
||||
*pi = prop.directManagedMemAccessFromHost;
|
||||
break;
|
||||
case hipDeviceAttributeConcurrentManagedAccess:
|
||||
*pi = prop.concurrentManagedAccess;
|
||||
break;
|
||||
case hipDeviceAttributePageableMemoryAccess:
|
||||
*pi = prop.pageableMemoryAccess;
|
||||
break;
|
||||
case hipDeviceAttributePageableMemoryAccessUsesHostPageTables:
|
||||
*pi = prop.pageableMemoryAccessUsesHostPageTables;
|
||||
break;
|
||||
case hipDeviceAttributeIsLargeBar:
|
||||
*pi = prop.isLargeBar;
|
||||
break;
|
||||
case hipDeviceAttributeUnifiedAddressing:
|
||||
// HIP runtime always uses SVM for host memory allocations.
|
||||
// Note: Host registered memory isn't covered by this feature
|
||||
// and still requires hipMemHostGetDevicePointer() call
|
||||
*pi = true;
|
||||
break;
|
||||
case hipDeviceAttributeCanUseStreamWaitValue:
|
||||
// hipStreamWaitValue64() and hipStreamWaitValue32() support
|
||||
*pi = g_devices[device]->devices()[0]->info().aqlBarrierValue_;
|
||||
break;
|
||||
case hipDeviceAttributeImageSupport:
|
||||
*pi = static_cast<int>(g_devices[device]->devices()[0]->info().imageSupport_);
|
||||
break;
|
||||
case hipDeviceAttributePhysicalMultiProcessorCount:
|
||||
*pi = g_devices[device]->devices()[0]->info().maxPhysicalComputeUnits_;
|
||||
break;
|
||||
case hipDeviceAttributeFineGrainSupport:
|
||||
*pi = static_cast<int>(g_devices[device]->devices()[0]->isFineGrainSupported());
|
||||
break;
|
||||
case hipDeviceAttributeMemoryPoolsSupported:
|
||||
*pi = HIP_MEM_POOL_SUPPORT;
|
||||
break;
|
||||
case hipDeviceAttributeVirtualMemoryManagementSupported:
|
||||
*pi = static_cast<int>(g_devices[device]->devices()[0]->info().virtualMemoryManagement_);
|
||||
break;
|
||||
case hipDeviceAttributeHostRegisterSupported:
|
||||
*pi = true;
|
||||
break;
|
||||
default:
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
case hipDeviceAttributeMaxThreadsPerBlock:
|
||||
*pi = prop.maxThreadsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeAsyncEngineCount:
|
||||
*pi = prop.asyncEngineCount;
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimX:
|
||||
*pi = prop.maxThreadsDim[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimY:
|
||||
*pi = prop.maxThreadsDim[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimZ:
|
||||
*pi = prop.maxThreadsDim[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimX:
|
||||
*pi = prop.maxGridSize[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimY:
|
||||
*pi = prop.maxGridSize[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimZ:
|
||||
*pi = prop.maxGridSize[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxSurface1D:
|
||||
*pi = prop.maxSurface1D;
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerBlock:
|
||||
*pi = prop.sharedMemPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeSharedMemPerBlockOptin:
|
||||
*pi = prop.sharedMemPerBlockOptin;
|
||||
break;
|
||||
case hipDeviceAttributeSharedMemPerMultiprocessor:
|
||||
*pi = prop.sharedMemPerMultiprocessor;
|
||||
break;
|
||||
case hipDeviceAttributeStreamPrioritiesSupported:
|
||||
*pi = prop.streamPrioritiesSupported;
|
||||
break;
|
||||
case hipDeviceAttributeSurfaceAlignment:
|
||||
*pi = prop.surfaceAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeTotalConstantMemory:
|
||||
// size_t to int casting
|
||||
*pi = std::min(prop.totalConstMem, int32_max);
|
||||
break;
|
||||
case hipDeviceAttributeTotalGlobalMem:
|
||||
*pi = std::min(prop.totalGlobalMem, int32_max);
|
||||
break;
|
||||
case hipDeviceAttributeWarpSize:
|
||||
*pi = prop.warpSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxRegistersPerBlock:
|
||||
*pi = prop.regsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeClockRate:
|
||||
*pi = prop.clockRate;
|
||||
break;
|
||||
case hipDeviceAttributeWallClockRate:
|
||||
*pi = g_devices[device]->devices()[0]->info().wallClockFrequency_;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
*pi = prop.memoryClockRate;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryBusWidth:
|
||||
*pi = prop.memoryBusWidth;
|
||||
break;
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
*pi = prop.multiProcessorCount;
|
||||
break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
*pi = prop.computeMode;
|
||||
break;
|
||||
case hipDeviceAttributeComputePreemptionSupported:
|
||||
*pi = prop.computePreemptionSupported;
|
||||
break;
|
||||
case hipDeviceAttributeL2CacheSize:
|
||||
*pi = prop.l2CacheSize;
|
||||
break;
|
||||
case hipDeviceAttributeLocalL1CacheSupported:
|
||||
*pi = prop.localL1CacheSupported;
|
||||
break;
|
||||
case hipDeviceAttributeLuidDeviceNodeMask:
|
||||
*pi = prop.luidDeviceNodeMask;
|
||||
break;
|
||||
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
|
||||
*pi = prop.maxThreadsPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMajor:
|
||||
*pi = prop.major;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMinor:
|
||||
*pi = prop.minor;
|
||||
break;
|
||||
case hipDeviceAttributeMultiGpuBoardGroupID:
|
||||
*pi = prop.multiGpuBoardGroupID;
|
||||
break;
|
||||
case hipDeviceAttributePciBusId:
|
||||
*pi = prop.pciBusID;
|
||||
break;
|
||||
case hipDeviceAttributeConcurrentKernels:
|
||||
*pi = prop.concurrentKernels;
|
||||
break;
|
||||
case hipDeviceAttributePciDeviceId:
|
||||
*pi = prop.pciDeviceID;
|
||||
break;
|
||||
case hipDeviceAttributePciDomainID:
|
||||
*pi = prop.pciDomainID;
|
||||
break;
|
||||
case hipDeviceAttributePersistingL2CacheMaxSize:
|
||||
*pi = prop.persistingL2CacheMaxSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxRegistersPerMultiprocessor:
|
||||
*pi = prop.regsPerMultiprocessor;
|
||||
break;
|
||||
case hipDeviceAttributeReservedSharedMemPerBlock:
|
||||
*pi = prop.reservedSharedMemPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
|
||||
*pi = prop.maxSharedMemoryPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeIsMultiGpuBoard:
|
||||
*pi = prop.isMultiGpuBoard;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeLaunch:
|
||||
*pi = prop.cooperativeLaunch;
|
||||
break;
|
||||
case hipDeviceAttributeHostRegisterSupported:
|
||||
*pi = 1; // AMD GPUs allow you to register host memory regardless of the GPU
|
||||
break;
|
||||
case hipDeviceAttributeDeviceOverlap:
|
||||
*pi = prop.asyncEngineCount > 0 ? 1 : 0;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
|
||||
*pi = prop.cooperativeMultiDeviceLaunch;
|
||||
break;
|
||||
case hipDeviceAttributeIntegrated:
|
||||
*pi = prop.integrated;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture1DWidth:
|
||||
*pi = prop.maxTexture1D;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture1DLinear:
|
||||
*pi = prop.maxTexture1DLinear;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture1DMipmap:
|
||||
*pi = prop.maxTexture1DMipmap;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTextureCubemap:
|
||||
*pi = prop.maxTextureCubemap;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture2DWidth:
|
||||
*pi = prop.maxTexture2D[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture2DHeight:
|
||||
*pi = prop.maxTexture2D[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DWidth:
|
||||
*pi = prop.maxTexture3D[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DHeight:
|
||||
*pi = prop.maxTexture3D[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DDepth:
|
||||
*pi = prop.maxTexture3D[2];
|
||||
break;
|
||||
case hipDeviceAttributeHdpMemFlushCntl:
|
||||
*reinterpret_cast<unsigned int**>(pi) = prop.hdpMemFlushCntl;
|
||||
break;
|
||||
case hipDeviceAttributeHdpRegFlushCntl:
|
||||
*reinterpret_cast<unsigned int**>(pi) = prop.hdpRegFlushCntl;
|
||||
break;
|
||||
case hipDeviceAttributeMaxPitch:
|
||||
// size_t to int casting
|
||||
*pi = std::min(prop.memPitch, int32_max);
|
||||
break;
|
||||
case hipDeviceAttributeTextureAlignment:
|
||||
*pi = prop.textureAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeTexturePitchAlignment:
|
||||
*pi = prop.texturePitchAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeKernelExecTimeout:
|
||||
*pi = prop.kernelExecTimeoutEnabled;
|
||||
break;
|
||||
case hipDeviceAttributeCanMapHostMemory:
|
||||
*pi = prop.canMapHostMemory;
|
||||
break;
|
||||
case hipDeviceAttributeCanUseHostPointerForRegisteredMem:
|
||||
*pi = prop.canUseHostPointerForRegisteredMem;
|
||||
break;
|
||||
case hipDeviceAttributeEccEnabled:
|
||||
*pi = prop.ECCEnabled;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedFunc;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedGridDim;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedBlockDim;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedSharedMem;
|
||||
break;
|
||||
case hipDeviceAttributeAsicRevision:
|
||||
*pi = prop.asicRevision;
|
||||
break;
|
||||
case hipDeviceAttributeManagedMemory:
|
||||
*pi = prop.managedMemory;
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlocksPerMultiProcessor:
|
||||
*pi = prop.maxBlocksPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeDirectManagedMemAccessFromHost:
|
||||
*pi = prop.directManagedMemAccessFromHost;
|
||||
break;
|
||||
case hipDeviceAttributeGlobalL1CacheSupported:
|
||||
*pi = prop.globalL1CacheSupported;
|
||||
break;
|
||||
case hipDeviceAttributeHostNativeAtomicSupported:
|
||||
*pi = prop.hostNativeAtomicSupported;
|
||||
break;
|
||||
case hipDeviceAttributeConcurrentManagedAccess:
|
||||
*pi = prop.concurrentManagedAccess;
|
||||
break;
|
||||
case hipDeviceAttributePageableMemoryAccess:
|
||||
*pi = prop.pageableMemoryAccess;
|
||||
break;
|
||||
case hipDeviceAttributePageableMemoryAccessUsesHostPageTables:
|
||||
*pi = prop.pageableMemoryAccessUsesHostPageTables;
|
||||
break;
|
||||
case hipDeviceAttributeIsLargeBar:
|
||||
*pi = prop.isLargeBar;
|
||||
break;
|
||||
case hipDeviceAttributeUnifiedAddressing:
|
||||
// HIP runtime always uses SVM for host memory allocations.
|
||||
// Note: Host registered memory isn't covered by this feature
|
||||
// and still requires hipMemHostGetDevicePointer() call
|
||||
*pi = true;
|
||||
break;
|
||||
case hipDeviceAttributeCanUseStreamWaitValue:
|
||||
// hipStreamWaitValue64() and hipStreamWaitValue32() support
|
||||
*pi = g_devices[device]->devices()[0]->info().aqlBarrierValue_;
|
||||
break;
|
||||
case hipDeviceAttributeImageSupport:
|
||||
*pi = static_cast<int>(g_devices[device]->devices()[0]->info().imageSupport_);
|
||||
break;
|
||||
case hipDeviceAttributePhysicalMultiProcessorCount:
|
||||
*pi = g_devices[device]->devices()[0]->info().maxPhysicalComputeUnits_;
|
||||
break;
|
||||
case hipDeviceAttributeFineGrainSupport:
|
||||
*pi = static_cast<int>(g_devices[device]->devices()[0]->isFineGrainSupported());
|
||||
break;
|
||||
case hipDeviceAttributeMemoryPoolsSupported:
|
||||
*pi = HIP_MEM_POOL_SUPPORT;
|
||||
break;
|
||||
case hipDeviceAttributeVirtualMemoryManagementSupported:
|
||||
*pi = static_cast<int>(g_devices[device]->devices()[0]->info().virtualMemoryManagement_);
|
||||
break;
|
||||
default:
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
|
||||
|
||||
hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusIdstr) {
|
||||
HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr);
|
||||
|
||||
if (device == nullptr || pciBusIdstr == nullptr) {
|
||||
@@ -369,9 +433,9 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
|
||||
int pciDeviceID = -1;
|
||||
int pciDomainID = -1;
|
||||
bool found = false;
|
||||
if (sscanf (pciBusIdstr, "%04x:%02x:%02x", reinterpret_cast<unsigned int*>(&pciDomainID),
|
||||
reinterpret_cast<unsigned int*>(&pciBusID),
|
||||
reinterpret_cast<unsigned int*>(&pciDeviceID)) == 0x3) {
|
||||
if (sscanf(pciBusIdstr, "%04x:%02x:%02x", reinterpret_cast<unsigned int*>(&pciDomainID),
|
||||
reinterpret_cast<unsigned int*>(&pciBusID),
|
||||
reinterpret_cast<unsigned int*>(&pciDeviceID)) == 0x3) {
|
||||
int count = 0;
|
||||
HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count));
|
||||
for (cl_int i = 0; i < count; i++) {
|
||||
@@ -380,8 +444,8 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
|
||||
HIP_RETURN_ONFAIL(ihipDeviceGet(&dev, i));
|
||||
HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, dev));
|
||||
|
||||
if ((pciBusID == prop.pciBusID) && (pciDomainID == prop.pciDomainID)
|
||||
&& (pciDeviceID == prop.pciDeviceID)) {
|
||||
if ((pciBusID == prop.pciBusID) && (pciDomainID == prop.pciDomainID) &&
|
||||
(pciDeviceID == prop.pciDeviceID)) {
|
||||
*device = i;
|
||||
found = true;
|
||||
break;
|
||||
@@ -395,10 +459,10 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) {
|
||||
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig) {
|
||||
HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig);
|
||||
|
||||
if(cacheConfig == nullptr) {
|
||||
if (cacheConfig == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -407,8 +471,7 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) {
|
||||
|
||||
hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
|
||||
HIP_INIT_API(hipDeviceGetLimit, pValue, limit);
|
||||
|
||||
if (pValue == nullptr || limit >= hipLimitRange) {
|
||||
@@ -431,8 +494,7 @@ hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) {
|
||||
|
||||
hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device) {
|
||||
HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device);
|
||||
|
||||
int count;
|
||||
@@ -442,24 +504,19 @@ hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
//pciBusId should be large enough to store 13 characters including the NULL-terminator.
|
||||
// pciBusId should be large enough to store 13 characters including the NULL-terminator.
|
||||
if (pciBusId == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
hipDeviceProp_t prop;
|
||||
HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, device));
|
||||
auto* deviceHandle = g_devices[device]->devices()[0];
|
||||
snprintf (pciBusId, len, "%04x:%02x:%02x.%01x",
|
||||
prop.pciDomainID,
|
||||
prop.pciBusID,
|
||||
prop.pciDeviceID,
|
||||
deviceHandle->info().deviceTopology_.pcie.function);
|
||||
snprintf(pciBusId, len, "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
|
||||
|
||||
HIP_RETURN(len <= 12 ? hipErrorInvalidValue : hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) {
|
||||
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig) {
|
||||
HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig);
|
||||
if (pConfig == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
@@ -469,7 +526,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceReset ( void ) {
|
||||
hipError_t hipDeviceReset(void) {
|
||||
HIP_INIT_API(hipDeviceReset);
|
||||
|
||||
hip::getCurrentDevice()->Reset();
|
||||
@@ -477,7 +534,7 @@ hipError_t hipDeviceReset ( void ) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) {
|
||||
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
|
||||
HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig);
|
||||
|
||||
// No way to set cache config yet.
|
||||
@@ -485,34 +542,33 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) {
|
||||
hipError_t hipDeviceSetLimit(hipLimit_t limit, size_t value) {
|
||||
HIP_INIT_API(hipDeviceSetLimit, limit, value);
|
||||
if (limit >= hipLimitRange) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
switch(limit) {
|
||||
case hipLimitStackSize :
|
||||
// need to query device size and take action
|
||||
if (!hip::getCurrentDevice()->devices()[0]->UpdateStackSize(value)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
break;
|
||||
case hipLimitMallocHeapSize:
|
||||
if (!hip::getCurrentDevice()->devices()[0]->UpdateInitialHeapSize(value)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LogPrintfError("UnsupportedLimit = %d is passed", limit);
|
||||
HIP_RETURN(hipErrorUnsupportedLimit);
|
||||
switch (limit) {
|
||||
case hipLimitStackSize:
|
||||
// need to query device size and take action
|
||||
if (!hip::getCurrentDevice()->devices()[0]->UpdateStackSize(value)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
break;
|
||||
case hipLimitMallocHeapSize:
|
||||
if (!hip::getCurrentDevice()->devices()[0]->UpdateInitialHeapSize(value)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LogPrintfError("UnsupportedLimit = %d is passed", limit);
|
||||
HIP_RETURN(hipErrorUnsupportedLimit);
|
||||
}
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) {
|
||||
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
|
||||
HIP_INIT_API(hipDeviceSetSharedMemConfig, config);
|
||||
if (config != hipSharedMemBankSizeDefault &&
|
||||
config != hipSharedMemBankSizeFourByte &&
|
||||
if (config != hipSharedMemBankSizeDefault && config != hipSharedMemBankSizeFourByte &&
|
||||
config != hipSharedMemBankSizeEightByte) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -530,13 +586,13 @@ hipError_t hipDeviceSynchronize() {
|
||||
|
||||
int ihipGetDevice() {
|
||||
hip::Device* device = hip::getCurrentDevice();
|
||||
if(device == nullptr){
|
||||
if (device == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
return device->deviceId();
|
||||
}
|
||||
|
||||
hipError_t hipGetDevice ( int* deviceId ) {
|
||||
hipError_t hipGetDevice(int* deviceId) {
|
||||
HIP_INIT_API(hipGetDevice, deviceId);
|
||||
|
||||
if (deviceId != nullptr) {
|
||||
@@ -551,13 +607,13 @@ hipError_t hipGetDevice ( int* deviceId ) {
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceCount ( int* count ) {
|
||||
hipError_t hipGetDeviceCount(int* count) {
|
||||
HIP_INIT_API_NO_RETURN(hipGetDeviceCount, count);
|
||||
|
||||
HIP_RETURN(ihipDeviceGetCount(count));
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceFlags ( unsigned int* flags ) {
|
||||
hipError_t hipGetDeviceFlags(unsigned int* flags) {
|
||||
HIP_INIT_API(hipGetDeviceFlags, flags);
|
||||
if (flags == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
@@ -566,7 +622,7 @@ hipError_t hipGetDeviceFlags ( unsigned int* flags ) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipSetDevice ( int device ) {
|
||||
hipError_t hipSetDevice(int device) {
|
||||
HIP_INIT_API_NO_RETURN(hipSetDevice, device);
|
||||
if (static_cast<unsigned int>(device) < g_devices.size()) {
|
||||
hip::setCurrentDevice(device);
|
||||
@@ -578,7 +634,7 @@ hipError_t hipSetDevice ( int device ) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
hipError_t hipSetDeviceFlags ( unsigned int flags ) {
|
||||
hipError_t hipSetDeviceFlags(unsigned int flags) {
|
||||
HIP_INIT_API(hipSetDeviceFlags, flags);
|
||||
if (g_devices.empty()) {
|
||||
HIP_RETURN(hipErrorNoDevice);
|
||||
@@ -590,9 +646,10 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) {
|
||||
// Only one scheduling flag allowed a time
|
||||
uint32_t scheduleFlag = flags & hipDeviceScheduleMask;
|
||||
|
||||
if (((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleSpin) && ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleYield)
|
||||
&& ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleBlockingSync)
|
||||
&& ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleAuto)) {
|
||||
if (((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleSpin) &&
|
||||
((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleYield) &&
|
||||
((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleBlockingSync) &&
|
||||
((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleAuto)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -625,7 +682,7 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipSetValidDevices ( int* device_arr, int len ) {
|
||||
hipError_t hipSetValidDevices(int* device_arr, int len) {
|
||||
HIP_INIT_API(hipSetValidDevices, device_arr, len);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
@@ -525,4 +525,11 @@ global:
|
||||
hipArray3DGetDescriptor;
|
||||
local:
|
||||
*;
|
||||
} hip_5.5;
|
||||
} hip_5.5;
|
||||
|
||||
hip_6.0 {
|
||||
global:
|
||||
hipGetDevicePropertiesR0600;
|
||||
local:
|
||||
*;
|
||||
} hip_5.6;
|
||||
@@ -151,7 +151,7 @@ hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr,
|
||||
// Linear layout access is supported if P2P is enabled
|
||||
// Opaque Images are supported only on homogeneous systems
|
||||
// Might have more conditions to check, in future.
|
||||
if (srcDeviceProp.gcnArch == dstDeviceProp.gcnArch) {
|
||||
if (std::string(srcDeviceProp.gcnArchName) == std::string(dstDeviceProp.gcnArchName)) {
|
||||
HIP_RETURN_ONFAIL(canAccessPeer(value, srcDevice, dstDevice));
|
||||
} else {
|
||||
*value = 0;
|
||||
|
||||
@@ -76,7 +76,12 @@ bool RTCProgram::findIsa() {
|
||||
}
|
||||
|
||||
void* sym_hipGetDevice = amd::Os::getSymbol(handle, "hipGetDevice");
|
||||
void* sym_hipGetDeviceProperties = amd::Os::getSymbol(handle, "hipGetDeviceProperties");
|
||||
void* sym_hipGetDeviceProperties =
|
||||
amd::Os::getSymbol(handle, "hipGetDevicePropertiesR0600"); // Try to find the new symbol
|
||||
if (sym_hipGetDeviceProperties == nullptr) {
|
||||
sym_hipGetDeviceProperties =
|
||||
amd::Os::getSymbol(handle, "hipGetDeviceProperties"); // Fall back to old one
|
||||
}
|
||||
|
||||
if (sym_hipGetDevice == nullptr || sym_hipGetDeviceProperties == nullptr) {
|
||||
LogInfo("ISA cannot be found to dlsym failure");
|
||||
|
||||
@@ -344,6 +344,9 @@ struct Info : public amd::EmbeddedObject {
|
||||
//! Max width of 2D image in pixels.
|
||||
size_t image2DMaxWidth_;
|
||||
|
||||
//! Max width of 2DA image in pixels.
|
||||
size_t image2DAMaxWidth_[2];
|
||||
|
||||
//! Max height of 2D image in pixels.
|
||||
size_t image2DMaxHeight_;
|
||||
|
||||
@@ -480,6 +483,9 @@ struct Info : public amd::EmbeddedObject {
|
||||
//! Returns max number of pixels for a 1D image
|
||||
size_t image1DMaxWidth_;
|
||||
|
||||
//! Returns max number of pixels for a 1DA image
|
||||
size_t image1DAMaxWidth_;
|
||||
|
||||
//! Returns max number of pixels for a 1D image created from a buffer object
|
||||
size_t imageMaxBufferSize_;
|
||||
|
||||
@@ -1526,7 +1532,6 @@ class Isa {
|
||||
uint32_t memChannelBankWidth_; //!< Memory channel bank width.
|
||||
uint32_t localMemSizePerCU_; //!< Local memory size per CU.
|
||||
uint32_t localMemBanks_; //!< Number of banks of local memory.
|
||||
|
||||
}; // class Isa
|
||||
|
||||
/*! \addtogroup Runtime
|
||||
|
||||
@@ -563,6 +563,9 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
info_.maxMemAllocSize_ / kPixelRgbaSize);
|
||||
info_.image1DMaxWidth_ = maxTextureSize;
|
||||
info_.imageMaxArraySize_ = MaxImageArraySize;
|
||||
info_.image2DAMaxWidth_[0] = MaxImageArraySize;
|
||||
info_.image2DAMaxWidth_[1] = MaxImageArraySize;
|
||||
info_.image1DAMaxWidth_ = maxTextureSize;
|
||||
info_.preferredInteropUserSync_ = true;
|
||||
info_.printfBufferSize_ = PrintfDbg::WorkitemDebugSize * info().maxWorkGroupSize_;
|
||||
|
||||
|
||||
@@ -1521,6 +1521,27 @@ bool Device::populateOCLDeviceConstants() {
|
||||
|
||||
info_.imageMaxArraySize_ = max_array_size;
|
||||
|
||||
uint32_t max_image1da_width = 0;
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(bkendDevice_,
|
||||
static_cast<hsa_agent_info_t>(HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS),
|
||||
&max_image1da_width)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
info_.image1DAMaxWidth_ = max_image1da_width;
|
||||
|
||||
uint32_t max_image2da_width[2] = {0, 0};
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(bkendDevice_,
|
||||
static_cast<hsa_agent_info_t>(HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS),
|
||||
&max_image2da_width)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
info_.image2DAMaxWidth_[0] = max_image2da_width[0];
|
||||
info_.image2DAMaxWidth_[1] = max_image2da_width[1];
|
||||
|
||||
uint32_t max_image1d_width = 0;
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(bkendDevice_,
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача