Device property memoryClockRate implementation.

+ Device property memoryClockRate is added to hipDeviceProp_t struct. + Device attribute hipDeviceAttributeMemoryClockRate is added to hipDeviceAttribute_t struct. + Tests update. + Rename hipDevAttrConcurrentKernels to hipDeviceAttributeConcurrentKernels.
2016-02-18 17:25:28 +03:00
@@ -80,7 +80,8 @@ typedef struct hipDeviceProp_t {
    int maxThreadsPerBlock;                     ///< Max work items per work group or workgroup max size.
    int maxThreadsDim[3];                       ///< Max number of threads in each dimension (XYZ) of a block.
    int maxGridSize[3];                         ///< Max grid dimensions (XYZ).
-    int clockRate;                              ///< Max clock frequency of the multiProcessors, in khz.
+    int clockRate;                              ///< Max clock frequency of the multiProcessors in khz.
+    int memoryClockRate;                        ///< Max memory clock frequency in khz.
    size_t totalConstMem;                       ///< Size of shared memory region (in bytes).
    int major;                                  ///< Major compute capability.  On HCC, this is an approximation and features may differ from CUDA CC.  See the arch feature flags for portable ways to query feature caps.
    int minor;                                  ///< Minor compute capability.  On HCC, this is an approximation and features may differ from CUDA CC.  See the arch feature flags for portable ways to query feature caps.
@@ -143,13 +144,14 @@ typedef enum hipDeviceAttribute_t {
    hipDeviceAttributeWarpSize,                             ///< Warp size in threads.
    hipDeviceAttributeMaxRegistersPerBlock,                 ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor.
    hipDeviceAttributeClockRate,                            ///< Peak clock frequency in kilohertz.
+    hipDeviceAttributeMemoryClockRate,                      ///< Peak memory clock frequency in kilohertz.
    hipDeviceAttributeMultiprocessorCount,                  ///< Number of multiprocessors on the device.
    hipDeviceAttributeComputeMode,                          ///< Compute mode that device is currently in.
    hipDeviceAttributeL2CacheSize,                          ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
    hipDeviceAttributeMaxThreadsPerMultiProcessor,          ///< Maximum resident threads per multiprocessor.
    hipDeviceAttributeComputeCapabilityMajor,               ///< Major compute capability version number.
    hipDeviceAttributeComputeCapabilityMinor,               ///< Minor compute capability version number.
-    hipDevAttrConcurrentKernels,                            ///< Device can possibly execute multiple kernels concurrently.
+    hipDeviceAttributeConcurrentKernels,                    ///< Device can possibly execute multiple kernels concurrently.
    hipDeviceAttributePciBusId,                             ///< PCI Bus ID.
    hipDeviceAttributePciDeviceId,                          ///< PCI Device ID.
    hipDeviceAttributeMaxSharedMemoryPerMultiprocessor,     ///< Maximum Shared Memory Per Multiprocessor.
@@ -242,6 +242,8 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
        cdattr = cudaDevAttrMaxRegistersPerBlock; break;
    case hipDeviceAttributeClockRate:
        cdattr = cudaDevAttrClockRate; break;
+    case hipDeviceAttributeMemoryClockRate:
+        cdattr = cudaDevAttrMemoryClockRate:; break;
    case hipDeviceAttributeMultiprocessorCount:
        cdattr = cudaDevAttrMultiProcessorCount; break;
    case hipDeviceAttributeComputeMode:
@@ -252,7 +254,7 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
        cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; break;
    case hipDeviceAttributeComputeCapabilityMajor:
        cdattr = cudaDevAttrComputeCapabilityMajor; break;
-    case hipDevAttrConcurrentKernels:
+    case hipDeviceAttributeConcurrentKernels:
        cdattr = cudaDevAttrConcurrentKernels; break;
    case hipDeviceAttributePciBusId:
        cdattr = cudaDevAttrPciBusId; break;
@@ -81,7 +81,8 @@ void printDeviceProp (int deviceId)
    cout << setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << endl;
    cout << setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor << endl;
    cout << setw(w1) << "clockRate: " << (float)props.clockRate / 1000.0 << " Mhz" << endl;
-    cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0<< " Mhz" << endl;
+    cout << setw(w1) << "memoryClockRate: " << (float)props.memoryClockRate / 1000.0 << " Mhz" << endl;
+    cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0 << " Mhz" << endl;
    cout << setw(w1) << "totalGlobalMem: " << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl;
    cout << setw(w1) << "maxSharedMemoryPerMultiProcessor: " << fixed << setprecision(2) << bytesToGB(props.maxSharedMemoryPerMultiProcessor) << " GB" << endl;
    cout << setw(w1) << "totalConstMem: " << props.totalConstMem << endl;
@@ -362,12 +362,17 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)

    // Get the size of the region we are using for Accelerator Memory allocations:
    hsa_region_t *am_region = static_cast<hsa_region_t*> (_acc.get_hsa_am_region());
-    err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &(prop->totalGlobalMem));
+    err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem);
    DeviceErrorCheck(err);
    // maxSharedMemoryPerMultiProcessor should be as the same as group memory size.
    // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size.
    prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;

+    // Get Max memory clock frequency
+    err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
+    prop->memoryClockRate *= 1000.0;   // convert Mhz to Khz.
+    DeviceErrorCheck(err);
+
    // Set feature flags - these are all mandatory for HIP on HCC path:
    // Some features are under-development and future revs may support flags that are currently 0.
    // Reporting of these flags should be synchronized with the HIP_ARCH* compile-time defines in hip_runtime.h
@@ -838,6 +843,8 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
            *pi = prop->regsPerBlock; break;
        case hipDeviceAttributeClockRate:
            *pi = prop->clockRate; break;
+        case hipDeviceAttributeMemoryClockRate:
+            *pi = prop->memoryClockRate; break;
        case hipDeviceAttributeMultiprocessorCount:
            *pi = prop->multiProcessorCount; break;
        case hipDeviceAttributeComputeMode:
@@ -852,7 +859,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
            *pi = prop->minor; break;
        case hipDeviceAttributePciBusId:
            *pi = prop->pciBusID; break;
-        case hipDevAttrConcurrentKernels:
+        case hipDeviceAttributeConcurrentKernels:
            *pi = prop->concurrentKernels; break;
        case hipDeviceAttributePciDeviceId:
            *pi = prop->pciDeviceID; break;
@@ -67,13 +67,14 @@ int main(int argc, char *argv[])
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeWarpSize, props.warpSize));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate));
+    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryClockRate, props.memoryClockRate));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerMultiProcessor, props.maxThreadsPerMultiProcessor));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMajor, props.major));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMinor, props.minor));
-    CHECK(test_hipDeviceGetAttribute(deviceId, hipDevAttrConcurrentKernels, props.concurrentKernels));
+    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeConcurrentKernels, props.concurrentKernels));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciBusId, props.pciBusID));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciDeviceId, props.pciDeviceID));
    CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, props.maxSharedMemoryPerMultiProcessor));