Device property memoryBusWidth implementation.
+ Device property memoryBusWidth is added to hipDeviceProp_t struct. + Device attribute hipDeviceAttributeMemoryBusWidth is added to hipDeviceAttribute_t struct. + Tests update.
This commit is contained in:
@@ -81,7 +81,8 @@ typedef struct hipDeviceProp_t {
|
||||
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
|
||||
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
|
||||
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
|
||||
int memoryClockRate; ///< Max memory clock frequency in khz.
|
||||
int memoryClockRate; ///< Max global memory clock frequency in khz.
|
||||
int memoryBusWidth; ///< Global memory bus width in bits.
|
||||
size_t totalConstMem; ///< Size of shared memory region (in bytes).
|
||||
int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
|
||||
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
|
||||
@@ -145,6 +146,7 @@ typedef enum hipDeviceAttribute_t {
|
||||
hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor.
|
||||
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
|
||||
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
|
||||
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
|
||||
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
|
||||
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
|
||||
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
|
||||
|
||||
@@ -243,7 +243,9 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
|
||||
case hipDeviceAttributeClockRate:
|
||||
cdattr = cudaDevAttrClockRate; break;
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
cdattr = cudaDevAttrMemoryClockRate:; break;
|
||||
cdattr = cudaDevAttrMemoryClockRate; break;
|
||||
case hipDeviceAttributeMemoryBusWidth:
|
||||
cdattr = cudaDevAttrGlobalMemoryBusWidth; break;
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
cdattr = cudaDevAttrMultiProcessorCount; break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
|
||||
@@ -80,6 +80,7 @@ void printDeviceProp (int deviceId)
|
||||
cout << setw(w1) << "pciDeviceID: " << props.pciDeviceID << endl;
|
||||
cout << setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << endl;
|
||||
cout << setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor << endl;
|
||||
cout << setw(w1) << "memoryBusWidth: " << props.memoryBusWidth << endl;
|
||||
cout << setw(w1) << "clockRate: " << (float)props.clockRate / 1000.0 << " Mhz" << endl;
|
||||
cout << setw(w1) << "memoryClockRate: " << (float)props.memoryClockRate / 1000.0 << " Mhz" << endl;
|
||||
cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0 << " Mhz" << endl;
|
||||
|
||||
+9
-4
@@ -300,7 +300,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
|
||||
|
||||
|
||||
// Get Max clock frequency
|
||||
err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY,&prop->clockRate);
|
||||
err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &prop->clockRate);
|
||||
prop->clockRate *= 1000.0; // convert Mhz to Khz.
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
@@ -356,12 +356,11 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
|
||||
*/
|
||||
|
||||
// Get memory properties
|
||||
|
||||
err = hsa_agent_iterate_regions(_hsa_agent,get_region_info,prop);
|
||||
err = hsa_agent_iterate_regions(_hsa_agent, get_region_info, prop);
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
// Get the size of the region we are using for Accelerator Memory allocations:
|
||||
hsa_region_t *am_region = static_cast<hsa_region_t*> (_acc.get_hsa_am_region());
|
||||
hsa_region_t *am_region = static_cast<hsa_region_t*>(_acc.get_hsa_am_region());
|
||||
err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem);
|
||||
DeviceErrorCheck(err);
|
||||
// maxSharedMemoryPerMultiProcessor should be as the same as group memory size.
|
||||
@@ -370,7 +369,11 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
|
||||
|
||||
// Get Max memory clock frequency
|
||||
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
|
||||
DeviceErrorCheck(err);
|
||||
prop->memoryClockRate *= 1000.0; // convert Mhz to Khz.
|
||||
|
||||
// Get global memory bus width in bits
|
||||
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth);
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
// Set feature flags - these are all mandatory for HIP on HCC path:
|
||||
@@ -845,6 +848,8 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
*pi = prop->clockRate; break;
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
*pi = prop->memoryClockRate; break;
|
||||
case hipDeviceAttributeMemoryBusWidth:
|
||||
*pi = prop->memoryBusWidth; break;
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
*pi = prop->multiProcessorCount; break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
|
||||
@@ -68,6 +68,7 @@ int main(int argc, char *argv[])
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryClockRate, props.memoryClockRate));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryBusWidth, props.memoryBusWidth));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode));
|
||||
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize));
|
||||
|
||||
Reference in New Issue
Block a user