diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index e17f19675a..14faa7671b 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -56,6 +56,7 @@ void printCompilerInfo() { #endif } +double bytesToKB(size_t s) { return (double)s / (1024.0); } double bytesToGB(size_t s) { return (double)s / (1024.0 * 1024.0 * 1024.0); } #define printLimit(w1, limit, units) \ @@ -97,7 +98,7 @@ void printDeviceProp(int deviceId) { cout << setw(w1) << "totalGlobalMem: " << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl; cout << setw(w1) << "maxSharedMemoryPerMultiProcessor: " << fixed << setprecision(2) - << bytesToGB(props.maxSharedMemoryPerMultiProcessor) << " GB" << endl; + << bytesToKB(props.maxSharedMemoryPerMultiProcessor) << " KB" << endl; cout << setw(w1) << "totalConstMem: " << props.totalConstMem << endl; cout << setw(w1) << "sharedMemPerBlock: " << (float)props.sharedMemPerBlock / 1024.0 << " KB" << endl; diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 5fb7c53260..be08430bc3 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -677,7 +677,7 @@ hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) { break; case HSA_REGION_SEGMENT_GROUP: err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, - &(p_prop->sharedMemPerBlock)); + &(p_prop->maxSharedMemoryPerMultiProcessor)); break; default: break; @@ -835,10 +835,8 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) { hsa_region_t* am_region = static_cast(_acc.get_hsa_am_region()); err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); DeviceErrorCheck(err); - // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. - // Group memory will not be paged out, so, the physical memory size is the total shared memory - // size, and also equal to the group pool size. - prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; + // Current GPUs allow a workgroup to use all of LDS in a CU, so these two are equal. + prop->sharedMemPerBlock = prop->maxSharedMemoryPerMultiProcessor; // Get Max memory clock frequency err =