From 1c19dbb80709273754dcb5e96e467388eb8af904 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 18 Feb 2016 18:15:01 +0300 Subject: [PATCH] Device property memoryBusWidth implementation. + Device property memoryBusWidth is added to hipDeviceProp_t struct. + Device attribute hipDeviceAttributeMemoryBusWidth is added to hipDeviceAttribute_t struct. + Tests update. --- include/hip_runtime_api.h | 4 +++- include/nvcc_detail/hip_runtime_api.h | 4 +++- samples/1_Utils/hipInfo/hipInfo.cpp | 1 + src/hip_hcc.cpp | 13 +++++++++---- tests/src/hipGetDeviceAttribute.cpp | 1 + 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/hip_runtime_api.h b/include/hip_runtime_api.h index 5191bc5d54..dcec805be4 100644 --- a/include/hip_runtime_api.h +++ b/include/hip_runtime_api.h @@ -81,7 +81,8 @@ typedef struct hipDeviceProp_t { int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. int maxGridSize[3]; ///< Max grid dimensions (XYZ). int clockRate; ///< Max clock frequency of the multiProcessors in khz. - int memoryClockRate; ///< Max memory clock frequency in khz. + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. size_t totalConstMem; ///< Size of shared memory region (in bytes). int major; ///< Major compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. int minor; ///< Minor compute capability. On HCC, this is an approximation and features may differ from CUDA CC. See the arch feature flags for portable ways to query feature caps. @@ -145,6 +146,7 @@ typedef enum hipDeviceAttribute_t { hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a thread block. This number is shared by all thread blocks simultaneously resident on a multiprocessor. hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. + hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index 7a1e9bc6e9..85befff24f 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -243,7 +243,9 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att case hipDeviceAttributeClockRate: cdattr = cudaDevAttrClockRate; break; case hipDeviceAttributeMemoryClockRate: - cdattr = cudaDevAttrMemoryClockRate:; break; + cdattr = cudaDevAttrMemoryClockRate; break; + case hipDeviceAttributeMemoryBusWidth: + cdattr = cudaDevAttrGlobalMemoryBusWidth; break; case hipDeviceAttributeMultiprocessorCount: cdattr = cudaDevAttrMultiProcessorCount; break; case hipDeviceAttributeComputeMode: diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index 18d9176a07..9c3d2c1b53 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -80,6 +80,7 @@ void printDeviceProp (int deviceId) cout << setw(w1) << "pciDeviceID: " << props.pciDeviceID << endl; cout << setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << endl; cout << setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor << endl; + cout << setw(w1) << "memoryBusWidth: " << props.memoryBusWidth << endl; cout << setw(w1) << "clockRate: " << (float)props.clockRate / 1000.0 << " Mhz" << endl; cout << setw(w1) << "memoryClockRate: " << (float)props.memoryClockRate / 1000.0 << " Mhz" << endl; cout << setw(w1) << "clockInstructionRate: " << (float)props.clockInstructionRate / 1000.0 << " Mhz" << endl; diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index beba7c2775..f52aa467f4 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -300,7 +300,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) // Get Max clock frequency - err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY,&prop->clockRate); + err = hsa_agent_get_info(_hsa_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &prop->clockRate); prop->clockRate *= 1000.0; // convert Mhz to Khz. DeviceErrorCheck(err); @@ -356,12 +356,11 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) */ // Get memory properties - - err = hsa_agent_iterate_regions(_hsa_agent,get_region_info,prop); + err = hsa_agent_iterate_regions(_hsa_agent, get_region_info, prop); DeviceErrorCheck(err); // Get the size of the region we are using for Accelerator Memory allocations: - hsa_region_t *am_region = static_cast (_acc.get_hsa_am_region()); + hsa_region_t *am_region = static_cast(_acc.get_hsa_am_region()); err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); DeviceErrorCheck(err); // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. @@ -370,7 +369,11 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) // Get Max memory clock frequency err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate); + DeviceErrorCheck(err); prop->memoryClockRate *= 1000.0; // convert Mhz to Khz. + + // Get global memory bus width in bits + err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth); DeviceErrorCheck(err); // Set feature flags - these are all mandatory for HIP on HCC path: @@ -845,6 +848,8 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) *pi = prop->clockRate; break; case hipDeviceAttributeMemoryClockRate: *pi = prop->memoryClockRate; break; + case hipDeviceAttributeMemoryBusWidth: + *pi = prop->memoryBusWidth; break; case hipDeviceAttributeMultiprocessorCount: *pi = prop->multiProcessorCount; break; case hipDeviceAttributeComputeMode: diff --git a/tests/src/hipGetDeviceAttribute.cpp b/tests/src/hipGetDeviceAttribute.cpp index 33b5e2ba03..3cd88e3ed7 100644 --- a/tests/src/hipGetDeviceAttribute.cpp +++ b/tests/src/hipGetDeviceAttribute.cpp @@ -68,6 +68,7 @@ int main(int argc, char *argv[]) CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxRegistersPerBlock, props.regsPerBlock)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeClockRate, props.clockRate)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryClockRate, props.memoryClockRate)); + CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMemoryBusWidth, props.memoryBusWidth)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMultiprocessorCount, props.multiProcessorCount)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeComputeMode, props.computeMode)); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeL2CacheSize, props.l2CacheSize));