Device property memoryClockRate implementation.

+ Device property memoryClockRate is added to hipDeviceProp_t struct.
+ Device attribute hipDeviceAttributeMemoryClockRate is added to hipDeviceAttribute_t struct.
+ Tests update.
+ Rename hipDevAttrConcurrentKernels to hipDeviceAttributeConcurrentKernels.
此提交包含在:
Evgeny Mankov
2016-02-18 17:25:28 +03:00
父節點 859208d6f0
當前提交 8aace64dce
共有 5 個檔案被更改,包括 20 行新增7 行删除
+4 -2
查看文件
@@ -80,7 +80,8 @@ typedef struct hipDeviceProp_t {
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors, in khz.
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
int memoryClockRate; ///< Max memory clock frequency in khz.
size_t totalConstMem; ///< Size of shared memory region (in bytes).
int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
@@ -143,13 +144,14 @@ typedef enum hipDeviceAttribute_t {
hipDeviceAttributeWarpSize, ///< Warp size in threads.
hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor.
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
hipDevAttrConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
hipDeviceAttributePciBusId, ///< PCI Bus ID.
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per Multiprocessor.
+3 -1
查看文件
@@ -242,6 +242,8 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
cdattr = cudaDevAttrMaxRegistersPerBlock; break;
case hipDeviceAttributeClockRate:
cdattr = cudaDevAttrClockRate; break;
case hipDeviceAttributeMemoryClockRate:
cdattr = cudaDevAttrMemoryClockRate:; break;
case hipDeviceAttributeMultiprocessorCount:
cdattr = cudaDevAttrMultiProcessorCount; break;
case hipDeviceAttributeComputeMode:
@@ -252,7 +254,7 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; break;
case hipDeviceAttributeComputeCapabilityMajor:
cdattr = cudaDevAttrComputeCapabilityMajor; break;
case hipDevAttrConcurrentKernels:
case hipDeviceAttributeConcurrentKernels:
cdattr = cudaDevAttrConcurrentKernels; break;
case hipDeviceAttributePciBusId:
cdattr = cudaDevAttrPciBusId; break;
+2 -1
查看文件
@@ -81,7 +81,8 @@ void printDeviceProp (int deviceId)
cout << setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << endl;
cout << setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor << endl;
cout << setw(w1) << "clockRate: " << (float)props.clockRate / 1000.0 << " Mhz" << endl;
cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0<< " Mhz" << endl;
cout << setw(w1) << "memoryClockRate: " << (float)props.memoryClockRate / 1000.0 << " Mhz" << endl;
cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0 << " Mhz" << endl;
cout << setw(w1) << "totalGlobalMem: " << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl;
cout << setw(w1) << "maxSharedMemoryPerMultiProcessor: " << fixed << setprecision(2) << bytesToGB(props.maxSharedMemoryPerMultiProcessor) << " GB" << endl;
cout << setw(w1) << "totalConstMem: " << props.totalConstMem << endl;
+9 -2
查看文件
@@ -362,12 +362,17 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
// Get the size of the region we are using for Accelerator Memory allocations:
hsa_region_t *am_region = static_cast<hsa_region_t*> (_acc.get_hsa_am_region());
err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &(prop->totalGlobalMem));
err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem);
DeviceErrorCheck(err);
// maxSharedMemoryPerMultiProcessor should be as the same as group memory size.
// Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size.
prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;
// Get Max memory clock frequency
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
prop->memoryClockRate *= 1000.0; // convert Mhz to Khz.
DeviceErrorCheck(err);
// Set feature flags - these are all mandatory for HIP on HCC path:
// Some features are under-development and future revs may support flags that are currently 0.
// Reporting of these flags should be synchronized with the HIP_ARCH* compile-time defines in hip_runtime.h
@@ -838,6 +843,8 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
*pi = prop->regsPerBlock; break;
case hipDeviceAttributeClockRate:
*pi = prop->clockRate; break;
case hipDeviceAttributeMemoryClockRate:
*pi = prop->memoryClockRate; break;
case hipDeviceAttributeMultiprocessorCount:
*pi = prop->multiProcessorCount; break;
case hipDeviceAttributeComputeMode:
@@ -852,7 +859,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
*pi = prop->minor; break;
case hipDeviceAttributePciBusId:
*pi = prop->pciBusID; break;
case hipDevAttrConcurrentKernels:
case hipDeviceAttributeConcurrentKernels:
*pi = prop->concurrentKernels; break;
case hipDeviceAttributePciDeviceId:
*pi = prop->pciDeviceID; break;
+2 -1
查看文件
@@ -67,13 +67,14 @@ int main(int argc, char *argv[])
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeWarpSize, props.warpSize));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryClockRate, props.memoryClockRate));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerMultiProcessor, props.maxThreadsPerMultiProcessor));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMajor, props.major));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMinor, props.minor));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDevAttrConcurrentKernels, props.concurrentKernels));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeConcurrentKernels, props.concurrentKernels));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciBusId, props.pciBusID));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciDeviceId, props.pciDeviceID));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, props.maxSharedMemoryPerMultiProcessor));