diff --git a/projects/clr/hipamd/bin/hipify b/projects/clr/hipamd/bin/hipify index f4de89aab2..d143bdff37 100755 --- a/projects/clr/hipamd/bin/hipify +++ b/projects/clr/hipamd/bin/hipify @@ -364,6 +364,9 @@ while (@ARGV) { $ft{'err'} += s/\bcudaDevAttrMaxThreadsPerMultiProcessor\b/hipDeviceAttributeMaxThreadsPerMultiProcessor/g; $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMajor\b/hipDeviceAttributeComputeCapabilityMajor/g; $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMinor\b/hipDeviceAttributeComputeCapabilityMinor/g; + $ft{'err'} += s/\bcudaDevAttrPciBusId\b/hipDeviceAttributePciBusId/g; + $ft{'err'} += s/\bcudaDevAttrPciDeviceId\b/hipDeviceAttributePciDeviceId/g; + $ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerMultiprocessor\b/hipDeviceAttributeMaxSharedMemoryPerMultiprocessor/g; $ft{'dev'} += s/\bcudaDeviceAttr\b/hipDeviceAttribute_t/g; $ft{'dev'} += s/\bcudaDeviceGetAttribute\b/hipDeviceGetAttribute/g; diff --git a/projects/clr/hipamd/include/hip_runtime_api.h b/projects/clr/hipamd/include/hip_runtime_api.h index ca02197ac1..882103a1f4 100644 --- a/projects/clr/hipamd/include/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip_runtime_api.h @@ -72,27 +72,28 @@ typedef struct { * */ typedef struct hipDeviceProp_t { - char name[256]; ///< Device name. - size_t totalGlobalMem; ///< Size of global memory region (in bytes). - size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). - int regsPerBlock; ///< Registers per block. - int warpSize; ///< Warp size. - int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. - int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. - int maxGridSize[3]; ///< Max grid dimensions (XYZ). - int clockRate; ///< Max clock frequency of the multiProcessors, in khz. - size_t totalConstMem; ///< Size of shared memory region (in bytes). - int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. - int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. - int multiProcessorCount; ///< Number of multi-processors (compute units). - int l2CacheSize; ///< L2 cache size. - int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. - int computeMode; ///< Compute mode. - int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP. - hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. - int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. - int pciBusID; ///< PCI Bus ID. - int pciDeviceID; ///< PCI Device ID. + char name[256]; ///< Device name. + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors, in khz. + size_t totalConstMem; ///< Size of shared memory region (in bytes). + int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. + int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. + int multiProcessorCount; ///< Number of multi-processors (compute units). + int l2CacheSize; ///< L2 cache size. + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int computeMode; ///< Compute mode. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. } hipDeviceProp_t; @@ -130,26 +131,27 @@ typedef enum hipError_t { * @ingroup Enumerations */ typedef enum hipDeviceAttribute_t { - hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. - hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block. - hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block. - hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block. - hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid. - hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid. - hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid. - hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. - hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. - hipDeviceAttributeWarpSize, ///< Warp size in threads. - hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. - hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. - hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. - hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. - hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. - hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. - hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. - hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. - hipDeviceAttributePciBusId, ///< PCI Bus ID. - hipDeviceAttributePciDeviceId, ///< PCI Device ID. + hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. + hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block. + hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block. + hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block. + hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid. + hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid. + hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid. + hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. + hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. + hipDeviceAttributeWarpSize, ///< Warp size in threads. + hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. + hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. + hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. + hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. + hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. + hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. + hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. + hipDeviceAttributePciBusId, ///< PCI Bus ID. + hipDeviceAttributePciDeviceId, ///< PCI Device ID. + hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per Multiprocessor. } hipDeviceAttribute_t; /** diff --git a/projects/clr/hipamd/include/nvcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/nvcc_detail/hip_runtime_api.h index 63e3c9983b..f84de73872 100644 --- a/projects/clr/hipamd/include/nvcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/nvcc_detail/hip_runtime_api.h @@ -256,6 +256,8 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att cdattr = cudaDevAttrPciBusId; break; case hipDeviceAttributePciDeviceId: cdattr = cudaDevAttrPciDeviceId; break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor; break; default: cerror = cudaErrorInvalidValue; break; } diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index 47273328bf..bdfbdb230b 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -342,6 +342,19 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) Default compute mode (Multiple threads can use cudaSetDevice() with this device) */ prop->computeMode = 0; + // Get Max Threads Per Multiprocessor +/* + HsaSystemProperties props; + hsaKmtReleaseSystemProperties(); + if(HSAKMT_STATUS_SUCCESS == hsaKmtAcquireSystemProperties(&props)) { + HsaNodeProperties node_prop = {0}; + if(HSAKMT_STATUS_SUCCESS == hsaKmtGetNodeProperties(node, &node_prop)) { + uint32_t waves_per_cu = node_prop.MaxWavesPerSIMD; + prop-> maxThreadsPerMultiProcessor = prop->warpsize*waves_per_cu; + } + } +*/ + // Get memory properties err = hsa_agent_iterate_regions(_hsa_agent,get_region_info,prop); @@ -351,9 +364,9 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) hsa_region_t *am_region = static_cast (_acc.get_hsa_am_region()); err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &(prop->totalGlobalMem)); DeviceErrorCheck(err); - // maxThreadsPerMultiProcessor should be as the same as group memory size. + // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size. - prop->maxThreadsPerMultiProcessor = prop->totalGlobalMem; + prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; // Set feature flags - these are all mandatory for HIP on HCC path: // Some features are under-development and future revs may support flags that are currently 0. @@ -841,6 +854,8 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) *pi = prop->pciBusID; break; case hipDeviceAttributePciDeviceId: *pi = prop->pciDeviceID; break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + *pi = prop->maxSharedMemoryPerMultiProcessor; break; default: e = hipErrorInvalidValue; break; } diff --git a/projects/clr/hipamd/tests/src/hipGetDeviceAttribute.cpp b/projects/clr/hipamd/tests/src/hipGetDeviceAttribute.cpp index 6dc8861159..30fac8c1b4 100644 --- a/projects/clr/hipamd/tests/src/hipGetDeviceAttribute.cpp +++ b/projects/clr/hipamd/tests/src/hipGetDeviceAttribute.cpp @@ -75,6 +75,7 @@ int main(int argc, char *argv[]) CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMinor, props.minor)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciBusId, props.pciBusID)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciDeviceId, props.pciDeviceID)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, props.maxSharedMemoryPerMultiProcessor)); passed(); };