Device property memoryBusWidth implementation.

+ Device property memoryBusWidth is added to hipDeviceProp_t struct.
+ Device attribute hipDeviceAttributeMemoryBusWidth is added to hipDeviceAttribute_t struct.
+ Tests update.
This commit is contained in:
Evgeny Mankov
2016-02-18 18:15:01 +03:00
orang tua 617e7d8a7d
melakukan 1c19dbb807
5 mengubah file dengan 17 tambahan dan 6 penghapusan
+3 -1
Melihat File
@@ -81,7 +81,8 @@ typedef struct hipDeviceProp_t {
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
int memoryClockRate; ///< Max memory clock frequency in khz.
int memoryClockRate; ///< Max global memory clock frequency in khz.
int memoryBusWidth; ///< Global memory bus width in bits.
size_t totalConstMem; ///< Size of shared memory region (in bytes).
int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps.
@@ -145,6 +146,7 @@ typedef enum hipDeviceAttribute_t {
hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor.
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
+3 -1
Melihat File
@@ -243,7 +243,9 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
case hipDeviceAttributeClockRate:
cdattr = cudaDevAttrClockRate; break;
case hipDeviceAttributeMemoryClockRate:
cdattr = cudaDevAttrMemoryClockRate:; break;
cdattr = cudaDevAttrMemoryClockRate; break;
case hipDeviceAttributeMemoryBusWidth:
cdattr = cudaDevAttrGlobalMemoryBusWidth; break;
case hipDeviceAttributeMultiprocessorCount:
cdattr = cudaDevAttrMultiProcessorCount; break;
case hipDeviceAttributeComputeMode:
+1
Melihat File
@@ -80,6 +80,7 @@ void printDeviceProp (int deviceId)
cout << setw(w1) << "pciDeviceID: " << props.pciDeviceID << endl;
cout << setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << endl;
cout << setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor << endl;
cout << setw(w1) << "memoryBusWidth: " << props.memoryBusWidth << endl;
cout << setw(w1) << "clockRate: " << (float)props.clockRate / 1000.0 << " Mhz" << endl;
cout << setw(w1) << "memoryClockRate: " << (float)props.memoryClockRate / 1000.0 << " Mhz" << endl;
cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0 << " Mhz" << endl;
+9 -4
Melihat File
@@ -300,7 +300,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
// Get Max clock frequency
err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY,&prop->clockRate);
err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &prop->clockRate);
prop->clockRate *= 1000.0; // convert Mhz to Khz.
DeviceErrorCheck(err);
@@ -356,12 +356,11 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
*/
// Get memory properties
err = hsa_agent_iterate_regions(_hsa_agent,get_region_info,prop);
err = hsa_agent_iterate_regions(_hsa_agent, get_region_info, prop);
DeviceErrorCheck(err);
// Get the size of the region we are using for Accelerator Memory allocations:
hsa_region_t *am_region = static_cast<hsa_region_t*> (_acc.get_hsa_am_region());
hsa_region_t *am_region = static_cast<hsa_region_t*>(_acc.get_hsa_am_region());
err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem);
DeviceErrorCheck(err);
// maxSharedMemoryPerMultiProcessor should be as the same as group memory size.
@@ -370,7 +369,11 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
// Get Max memory clock frequency
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
DeviceErrorCheck(err);
prop->memoryClockRate *= 1000.0; // convert Mhz to Khz.
// Get global memory bus width in bits
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth);
DeviceErrorCheck(err);
// Set feature flags - these are all mandatory for HIP on HCC path:
@@ -845,6 +848,8 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
*pi = prop->clockRate; break;
case hipDeviceAttributeMemoryClockRate:
*pi = prop->memoryClockRate; break;
case hipDeviceAttributeMemoryBusWidth:
*pi = prop->memoryBusWidth; break;
case hipDeviceAttributeMultiprocessorCount:
*pi = prop->multiProcessorCount; break;
case hipDeviceAttributeComputeMode:
+1
Melihat File
@@ -68,6 +68,7 @@ int main(int argc, char *argv[])
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryClockRate, props.memoryClockRate));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryBusWidth, props.memoryBusWidth));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode));
CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize));