diff --git a/hipamd/include/hip_runtime_api.h b/hipamd/include/hip_runtime_api.h index d754862544..5191bc5d54 100644 --- a/hipamd/include/hip_runtime_api.h +++ b/hipamd/include/hip_runtime_api.h @@ -80,7 +80,8 @@ typedef struct hipDeviceProp_t { int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. int maxGridSize[3]; ///< Max grid dimensions (XYZ). - int clockRate; ///< Max clock frequency of the multiProcessors, in khz. + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + int memoryClockRate; ///< Max memory clock frequency in khz. size_t totalConstMem; ///< Size of shared memory region (in bytes). int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. @@ -143,13 +144,14 @@ typedef enum hipDeviceAttribute_t { hipDeviceAttributeWarpSize, ///< Warp size in threads. hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. - hipDevAttrConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently. + hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently. hipDeviceAttributePciBusId, ///< PCI Bus ID. hipDeviceAttributePciDeviceId, ///< PCI Device ID. hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per Multiprocessor. diff --git a/hipamd/include/nvcc_detail/hip_runtime_api.h b/hipamd/include/nvcc_detail/hip_runtime_api.h index 83f2d59646..7a1e9bc6e9 100644 --- a/hipamd/include/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/nvcc_detail/hip_runtime_api.h @@ -242,6 +242,8 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att cdattr = cudaDevAttrMaxRegistersPerBlock; break; case hipDeviceAttributeClockRate: cdattr = cudaDevAttrClockRate; break; + case hipDeviceAttributeMemoryClockRate: + cdattr = cudaDevAttrMemoryClockRate:; break; case hipDeviceAttributeMultiprocessorCount: cdattr = cudaDevAttrMultiProcessorCount; break; case hipDeviceAttributeComputeMode: @@ -252,7 +254,7 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; break; case hipDeviceAttributeComputeCapabilityMajor: cdattr = cudaDevAttrComputeCapabilityMajor; break; - case hipDevAttrConcurrentKernels: + case hipDeviceAttributeConcurrentKernels: cdattr = cudaDevAttrConcurrentKernels; break; case hipDeviceAttributePciBusId: cdattr = cudaDevAttrPciBusId; break; diff --git a/hipamd/samples/1_Utils/hipInfo/hipInfo.cpp b/hipamd/samples/1_Utils/hipInfo/hipInfo.cpp index c8979b1cc1..18d9176a07 100644 --- a/hipamd/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/hipamd/samples/1_Utils/hipInfo/hipInfo.cpp @@ -81,7 +81,8 @@ void printDeviceProp (int deviceId) cout << setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << endl; cout << setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor << endl; cout << setw(w1) << "clockRate: " << (float)props.clockRate / 1000.0 << " Mhz" << endl; - cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0<< " Mhz" << endl; + cout << setw(w1) << "memoryClockRate: " << (float)props.memoryClockRate / 1000.0 << " Mhz" << endl; + cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0 << " Mhz" << endl; cout << setw(w1) << "totalGlobalMem: " << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl; cout << setw(w1) << "maxSharedMemoryPerMultiProcessor: " << fixed << setprecision(2) << bytesToGB(props.maxSharedMemoryPerMultiProcessor) << " GB" << endl; cout << setw(w1) << "totalConstMem: " << props.totalConstMem << endl; diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 1ce3f4a5bb..beba7c2775 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -362,12 +362,17 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) // Get the size of the region we are using for Accelerator Memory allocations: hsa_region_t *am_region = static_cast (_acc.get_hsa_am_region()); - err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &(prop->totalGlobalMem)); + err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); DeviceErrorCheck(err); // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size. prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; + // Get Max memory clock frequency + err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate); + prop->memoryClockRate *= 1000.0; // convert Mhz to Khz. + DeviceErrorCheck(err); + // Set feature flags - these are all mandatory for HIP on HCC path: // Some features are under-development and future revs may support flags that are currently 0. // Reporting of these flags should be synchronized with the HIP_ARCH* compile-time defines in hip_runtime.h @@ -838,6 +843,8 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) *pi = prop->regsPerBlock; break; case hipDeviceAttributeClockRate: *pi = prop->clockRate; break; + case hipDeviceAttributeMemoryClockRate: + *pi = prop->memoryClockRate; break; case hipDeviceAttributeMultiprocessorCount: *pi = prop->multiProcessorCount; break; case hipDeviceAttributeComputeMode: @@ -852,7 +859,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) *pi = prop->minor; break; case hipDeviceAttributePciBusId: *pi = prop->pciBusID; break; - case hipDevAttrConcurrentKernels: + case hipDeviceAttributeConcurrentKernels: *pi = prop->concurrentKernels; break; case hipDeviceAttributePciDeviceId: *pi = prop->pciDeviceID; break; diff --git a/hipamd/tests/src/hipGetDeviceAttribute.cpp b/hipamd/tests/src/hipGetDeviceAttribute.cpp index 7f37e816d2..33b5e2ba03 100644 --- a/hipamd/tests/src/hipGetDeviceAttribute.cpp +++ b/hipamd/tests/src/hipGetDeviceAttribute.cpp @@ -67,13 +67,14 @@ int main(int argc, char *argv[]) CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeWarpSize, props.warpSize)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryClockRate, props.memoryClockRate)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerMultiProcessor, props.maxThreadsPerMultiProcessor)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMajor, props.major)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMinor, props.minor)); - CHECK(test_hipDeviceGetAttribute(deviceId, hipDevAttrConcurrentKernels, props.concurrentKernels)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeConcurrentKernels, props.concurrentKernels)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciBusId, props.pciBusID)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciDeviceId, props.pciDeviceID)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, props.maxSharedMemoryPerMultiProcessor));