Device property memoryClockRate implementation.
+ Device property memoryClockRate is added to hipDeviceProp_t struct. + Device attribute hipDeviceAttributeMemoryClockRate is added to hipDeviceAttribute_t struct. + Tests update. + Rename hipDevAttrConcurrentKernels to hipDeviceAttributeConcurrentKernels.
此提交包含在:
@@ -80,7 +80,8 @@ typedef struct hipDeviceProp_t {
|
||||
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
|
||||
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
|
||||
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
|
||||
int clockRate; ///< Max clock frequency of the multiProcessors, in khz.
|
||||
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
|
||||
int memoryClockRate; ///< Max memory clock frequency in khz.
|
||||
size_t totalConstMem; ///< Size of shared memory region (in bytes).
|
||||
int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
|
||||
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
|
||||
@@ -143,13 +144,14 @@ typedef enum hipDeviceAttribute_t {
|
||||
hipDeviceAttributeWarpSize, ///< Warp size in threads.
|
||||
hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor.
|
||||
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
|
||||
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
|
||||
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
|
||||
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
|
||||
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
|
||||
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor.
|
||||
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
|
||||
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
|
||||
hipDevAttrConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
|
||||
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently.
|
||||
hipDeviceAttributePciBusId, ///< PCI Bus ID.
|
||||
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
|
||||
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per Multiprocessor.
|
||||
|
||||
@@ -242,6 +242,8 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
|
||||
cdattr = cudaDevAttrMaxRegistersPerBlock; break;
|
||||
case hipDeviceAttributeClockRate:
|
||||
cdattr = cudaDevAttrClockRate; break;
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
cdattr = cudaDevAttrMemoryClockRate:; break;
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
cdattr = cudaDevAttrMultiProcessorCount; break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
@@ -252,7 +254,7 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
|
||||
cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; break;
|
||||
case hipDeviceAttributeComputeCapabilityMajor:
|
||||
cdattr = cudaDevAttrComputeCapabilityMajor; break;
|
||||
case hipDevAttrConcurrentKernels:
|
||||
case hipDeviceAttributeConcurrentKernels:
|
||||
cdattr = cudaDevAttrConcurrentKernels; break;
|
||||
case hipDeviceAttributePciBusId:
|
||||
cdattr = cudaDevAttrPciBusId; break;
|
||||
|
||||
@@ -81,7 +81,8 @@ void printDeviceProp (int deviceId)
|
||||
cout << setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << endl;
|
||||
cout << setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor << endl;
|
||||
cout << setw(w1) << "clockRate: " << (float)props.clockRate / 1000.0 << " Mhz" << endl;
|
||||
cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0<< " Mhz" << endl;
|
||||
cout << setw(w1) << "memoryClockRate: " << (float)props.memoryClockRate / 1000.0 << " Mhz" << endl;
|
||||
cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0 << " Mhz" << endl;
|
||||
cout << setw(w1) << "totalGlobalMem: " << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl;
|
||||
cout << setw(w1) << "maxSharedMemoryPerMultiProcessor: " << fixed << setprecision(2) << bytesToGB(props.maxSharedMemoryPerMultiProcessor) << " GB" << endl;
|
||||
cout << setw(w1) << "totalConstMem: " << props.totalConstMem << endl;
|
||||
|
||||
+9
-2
@@ -362,12 +362,17 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
|
||||
|
||||
// Get the size of the region we are using for Accelerator Memory allocations:
|
||||
hsa_region_t *am_region = static_cast<hsa_region_t*> (_acc.get_hsa_am_region());
|
||||
err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &(prop->totalGlobalMem));
|
||||
err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem);
|
||||
DeviceErrorCheck(err);
|
||||
// maxSharedMemoryPerMultiProcessor should be as the same as group memory size.
|
||||
// Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size.
|
||||
prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;
|
||||
|
||||
// Get Max memory clock frequency
|
||||
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
|
||||
prop->memoryClockRate *= 1000.0; // convert Mhz to Khz.
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
// Set feature flags - these are all mandatory for HIP on HCC path:
|
||||
// Some features are under-development and future revs may support flags that are currently 0.
|
||||
// Reporting of these flags should be synchronized with the HIP_ARCH* compile-time defines in hip_runtime.h
|
||||
@@ -838,6 +843,8 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
*pi = prop->regsPerBlock; break;
|
||||
case hipDeviceAttributeClockRate:
|
||||
*pi = prop->clockRate; break;
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
*pi = prop->memoryClockRate; break;
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
*pi = prop->multiProcessorCount; break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
@@ -852,7 +859,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
*pi = prop->minor; break;
|
||||
case hipDeviceAttributePciBusId:
|
||||
*pi = prop->pciBusID; break;
|
||||
case hipDevAttrConcurrentKernels:
|
||||
case hipDeviceAttributeConcurrentKernels:
|
||||
*pi = prop->concurrentKernels; break;
|
||||
case hipDeviceAttributePciDeviceId:
|
||||
*pi = prop->pciDeviceID; break;
|
||||
|
||||
@@ -67,13 +67,14 @@ int main(int argc, char *argv[])
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeWarpSize, props.warpSize));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryClockRate, props.memoryClockRate));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerMultiProcessor, props.maxThreadsPerMultiProcessor));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMajor, props.major));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeCapabilityMinor, props.minor));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDevAttrConcurrentKernels, props.concurrentKernels));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeConcurrentKernels, props.concurrentKernels));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciBusId, props.pciBusID));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributePciDeviceId, props.pciDeviceID));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, props.maxSharedMemoryPerMultiProcessor));
|
||||
|
||||
新增問題並參考
封鎖使用者