Fix typo: maxThreadsPerMultiProcessor -> MaxSharedMemoryPerMultiprocessor

Device property MaxSharedMemoryPerMultiprocessor set equal to totalGlobalMem (HIP path).
Reason: MaxSharedMemoryPerMultiprocessor should be as the same as group memory size. Group memory will not be paged out, so, the physical memory size = total shared memory size = group region size. NVCC path remains untouched: CUDA's device property MaxSharedMemoryPerMultiprocessor is reported.

hipify is updated as well.


[ROCm/clr commit: 460b501cbb]
This commit is contained in:
Evgeny Mankov
2016-02-12 01:29:20 +03:00
vanhempi 735d4738ad
commit 6add51ef8c
5 muutettua tiedostoa jossa 66 lisäystä ja 43 poistoa
+3
Näytä tiedosto
@@ -364,6 +364,9 @@ while (@ARGV) {
$ft{'err'} += s/\bcudaDevAttrMaxThreadsPerMultiProcessor\b/hipDeviceAttributeMaxThreadsPerMultiProcessor/g;
$ft{'err'} += s/\bcudaDevAttrComputeCapabilityMajor\b/hipDeviceAttributeComputeCapabilityMajor/g;
$ft{'err'} += s/\bcudaDevAttrComputeCapabilityMinor\b/hipDeviceAttributeComputeCapabilityMinor/g;
$ft{'err'} += s/\bcudaDevAttrPciBusId\b/hipDeviceAttributePciBusId/g;
$ft{'err'} += s/\bcudaDevAttrPciDeviceId\b/hipDeviceAttributePciDeviceId/g;
$ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerMultiprocessor\b/hipDeviceAttributeMaxSharedMemoryPerMultiprocessor/g;
$ft{'dev'} += s/\bcudaDeviceAttr\b/hipDeviceAttribute_t/g;
$ft{'dev'} += s/\bcudaDeviceGetAttribute\b/hipDeviceGetAttribute/g;
@@ -72,27 +72,28 @@ typedef struct {
*
*/
typedef struct hipDeviceProp_t {
char name[256]; ///< Device name.
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes).
int regsPerBlock; ///< Registers per block.
int warpSize; ///< Warp size.
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors, in khz.
size_t totalConstMem; ///< Size of shared memory region (in bytes).
int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
int multiProcessorCount; ///< Number of multi-processors (compute units).
int l2CacheSize; ///< L2 cache size.
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
int computeMode; ///< Compute mode.
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
int pciBusID; ///< PCI Bus ID.
int pciDeviceID; ///< PCI Device ID.
char name[256]; ///< Device name.
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes).
int regsPerBlock; ///< Registers per block.
int warpSize; ///< Warp size.
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors, in khz.
size_t totalConstMem; ///< Size of shared memory region (in bytes).
int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
int multiProcessorCount; ///< Number of multi-processors (compute units).
int l2CacheSize; ///< L2 cache size.
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
int computeMode; ///< Compute mode.
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
int pciBusID; ///< PCI Bus ID.
int pciDeviceID; ///< PCI Device ID.
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor.
} hipDeviceProp_t;
@@ -130,26 +131,27 @@ typedef enum hipError_t {
* @ingroup Enumerations
*/
typedef enum hipDeviceAttribute_t {
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block.
hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block.
hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block.
hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid.
hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid.
hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid.
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
hipDeviceAttributeWarpSize, ///< Warp size in threads.
hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor.
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
hipDeviceAttributePciBusId, ///< PCI Bus ID.
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block.
hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block.
hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block.
hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid.
hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid.
hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid.
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes.
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
hipDeviceAttributeWarpSize, ///< Warp size in threads.
hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor.
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
hipDeviceAttributePciBusId, ///< PCI Bus ID.
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per Multiprocessor.
} hipDeviceAttribute_t;
/**
@@ -256,6 +256,8 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
cdattr = cudaDevAttrPciBusId; break;
case hipDeviceAttributePciDeviceId:
cdattr = cudaDevAttrPciDeviceId; break;
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor; break;
default:
cerror = cudaErrorInvalidValue; break;
}
+17 -2
Näytä tiedosto
@@ -342,6 +342,19 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
Default compute mode (Multiple threads can use cudaSetDevice() with this device) */
prop->computeMode = 0;
// Get Max Threads Per Multiprocessor
/*
HsaSystemProperties props;
hsaKmtReleaseSystemProperties();
if(HSAKMT_STATUS_SUCCESS == hsaKmtAcquireSystemProperties(&props)) {
HsaNodeProperties node_prop = {0};
if(HSAKMT_STATUS_SUCCESS == hsaKmtGetNodeProperties(node, &node_prop)) {
uint32_t waves_per_cu = node_prop.MaxWavesPerSIMD;
prop-> maxThreadsPerMultiProcessor = prop->warpsize*waves_per_cu;
}
}
*/
// Get memory properties
err = hsa_agent_iterate_regions(_hsa_agent,get_region_info,prop);
@@ -351,9 +364,9 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
hsa_region_t *am_region = static_cast<hsa_region_t*> (_acc.get_hsa_am_region());
err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &(prop->totalGlobalMem));
DeviceErrorCheck(err);
// maxThreadsPerMultiProcessor should be as the same as group memory size.
// maxSharedMemoryPerMultiProcessor should be as the same as group memory size.
// Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size.
prop->maxThreadsPerMultiProcessor = prop->totalGlobalMem;
prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;
// Set feature flags - these are all mandatory for HIP on HCC path:
// Some features are under-development and future revs may support flags that are currently 0.
@@ -841,6 +854,8 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
*pi = prop->pciBusID; break;
case hipDeviceAttributePciDeviceId:
*pi = prop->pciDeviceID; break;
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
*pi = prop->maxSharedMemoryPerMultiProcessor; break;
default:
e = hipErrorInvalidValue; break;
}
@@ -75,6 +75,7 @@ int main(int argc, char *argv[])
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMinor, props.minor));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciBusId, props.pciBusID));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciDeviceId, props.pciDeviceID));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, props.maxSharedMemoryPerMultiProcessor));
passed();
};