From 50867efa10e0d807ffec39a40b9a1b03833fb3ca Mon Sep 17 00:00:00 2001 From: pensun Date: Fri, 11 Nov 2016 15:09:31 -0600 Subject: [PATCH] Add direct test case for threadfence_system workaround Change-Id: I5b21b590e957c901044741ac94e816cd8b1426f9 --- CMakeLists.txt | 9 -- src/hip_memory.cpp | 86 ++++++++++++------- .../memory/hipMemoryAllocateCoherent.cpp | 61 +++++++++++++ .../hipMemoryAllocateCoherentDriver.cpp | 60 +++++++++++++ 4 files changed, 174 insertions(+), 42 deletions(-) create mode 100644 tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp create mode 100644 tests/src/runtimeApi/memory/hipMemoryAllocateCoherentDriver.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e5c3d51c6a..ccd390fbe5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,15 +142,6 @@ if(NOT DEFINED COMPILE_HIP_ATP_MARKER) endif() add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER) -# Check if we need to force finegrained system memory allocation -if(NOT DEFINED HIP_COHERENT_HOST_ALLOC) - if(NOT DEFINED ENV{HIP_COHERENT_HOST_ALLOC}) - set(HIP_COHERENT_HOST_ALLOC 0) - else() - set(HIP_COHERENT_HOST_ALLOC $ENV{HIP_COHERENT_HOST_ALLOC}) - endif() -endif() -add_to_config(_buildInfo HIP_COHERENT_HOST_ALLOC) ############################# # Build steps diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 2f1eb1e27f..672b9f2ee2 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -129,12 +129,12 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) LockedAccessor_CtxCrit_t crit(ctx->criticalData()); // the peerCnt always stores self so make sure the trace actually peerCnt = crit->peerCnt(); - tprintf(DB_MEM, " allocated device_mem ptr:%p size:%zu on dev:%d and allowed %d other peer(s) access\n", + tprintf(DB_MEM, " allocated device_mem ptr:%p size:%zu on dev:%d and allowed %d other peer(s) access\n", *ptr, sizeBytes, device->_deviceId, peerCnt-1); - if (peerCnt > 1) { - + if (peerCnt > 1) { + //printf ("peer self access\n"); - + // TODOD - remove me: for (auto iter = crit->_peers.begin(); iter!=crit->_peers.end(); iter++) { tprintf (DB_MEM, " allow access to peer: %s%s\n", (*iter)->toString().c_str(), (iter == crit->_peers.begin()) ? " (self)":""); @@ -155,6 +155,20 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) return ihipLogStatus(hip_status); } +void ihipReadSingleEnv(int *var_ptr, const char *var_name1, const char *description) +{ + char * env = getenv(var_name1); + + // Default is set when variable is initialized (at top of this file), so only override if we find + // an environment variable. + if (env) { + long int v = strtol(env, NULL, 0); + *var_ptr = (int) (v); + } + if (HIP_PRINT_ENV) { + printf ("%-30s = %2d : %s\n", var_name1, *var_ptr, description); + } +} hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { @@ -173,44 +187,50 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) unsigned trueFlags = flags; if (flags == hipHostMallocDefault) { trueFlags = hipHostMallocMapped | hipHostMallocWriteCombined; - } + } const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped | hipHostMallocWriteCombined; + // Read from environment variable of HIP_COHERENT_HOST_ALLOC + int coherent_alloc=0; + ihipReadSingleEnv(&coherent_alloc, "HIP_COHERENT_HOST_ALLOC", "Flag to force allocate finegrained system memory"); + if (flags & ~supportedFlags) { hip_status = hipErrorInvalidValue; - } else { -#if HIP_COHERENT_HOST_ALLOC - // TODO - let's make this an environment variable - *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); - if(sizeBytes < 1 && (*ptr == NULL)){ - hip_status = hipErrorMemoryAllocation; - } else { - hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent); - } - tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); -#else - // TODO - am_alloc requires writeable __acc, perhaps could be refactored? - // TODO - hipHostMallocMapped is be ignored on ROCM - all memory is mapped to host address space as WC. + } + else { auto device = ctx->getWriteableDevice(); - *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); - if (*ptr == NULL) { - hip_status = hipErrorMemoryAllocation; - } else { - hc::am_memtracker_update(*ptr, device->_deviceId, flags); - // TODO-hipHostMallocPortable should map the host memory into all contexts, regardless of peer status. - int peerCnt=0; - { - LockedAccessor_CtxCrit_t crit(ctx->criticalData()); - peerCnt = crit->peerCnt(); - if (peerCnt > 1) { - hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); - } + if(coherent_alloc){ + // Force to allocate finedgrained system memory + *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); + if(sizeBytes < 1 && (*ptr == NULL)){ + hip_status = hipErrorMemoryAllocation; + } else { + hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent); + } + tprintf(DB_MEM, " %s: finegrained system memory ptr=%p\n", __func__, *ptr); + } + else{ + // TODO - am_alloc requires writeable __acc, perhaps could be refactored? + // TODO - hipHostMallocMapped is be ignored on ROCM - all memory is mapped to host address space as WC. + *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); + if (*ptr == NULL) { + hip_status = hipErrorMemoryAllocation; + } else { + hc::am_memtracker_update(*ptr, device->_deviceId, flags); + // TODO-hipHostMallocPortable should map the host memory into all contexts, regardless of peer status. + int peerCnt=0; + { + LockedAccessor_CtxCrit_t crit(ctx->criticalData()); + peerCnt = crit->peerCnt(); + if (peerCnt > 1) { + hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); + } + } + tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d and allow access to %d other peer(s)\n", *ptr, sizeBytes, device->_deviceId, peerCnt-1); } - tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d and allow access to %d other peer(s)\n", *ptr, sizeBytes, device->_deviceId, peerCnt-1); } } -#endif //HIP_COHERENT_HOST_ALLOC } return ihipLogStatus(hip_status); } diff --git a/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp b/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp new file mode 100644 index 0000000000..6042f538b3 --- /dev/null +++ b/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp @@ -0,0 +1,61 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * HIT_END + */ + + +#include +#include "hip/hip_runtime.h" + +__global__ void Kernel(hipLaunchParm lp,volatile float* hostRes) +{ + int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + hostRes[tid] = tid + 1; + __threadfence_system(); + // expecting that the data is getting flushed to host here! + // time waster for-loop (sleep) + for (int timeWater = 0; timeWater < 100000000; timeWater++); +} + +int main() +{ + size_t blocks = 2; + volatile float* hostRes; + hipHostMalloc((void**)&hostRes,blocks*sizeof(float),hipHostMallocMapped); + hostRes[0]=0; + hostRes[1]=0; + hipLaunchKernel(HIP_KERNEL_NAME(Kernel), dim3(1), dim3(blocks), 0, 0, hostRes); + int eleCounter = 0; + while (eleCounter < blocks) + { + // blocks until the value changes + while(hostRes[eleCounter] == 0); + printf("%f\n", hostRes[eleCounter]);; + eleCounter++; + } + hipHostFree((void *)hostRes); + return 0; +} + diff --git a/tests/src/runtimeApi/memory/hipMemoryAllocateCoherentDriver.cpp b/tests/src/runtimeApi/memory/hipMemoryAllocateCoherentDriver.cpp new file mode 100644 index 0000000000..dc512b41f8 --- /dev/null +++ b/tests/src/runtimeApi/memory/hipMemoryAllocateCoherentDriver.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the "Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the +following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO +EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * RUN: %t + * HIT_END + */ + +#include +#include +#include +#include +#include +#include +#include +#include "hip/hip_runtime.h" +using namespace std; + +string getRes(){ + FILE *in; + char buff[512], buff_2[512]; + string str = "./hipMemoryAllocateCoherent"; + if(!(in = popen(str.c_str(), "r"))){ + exit(1); + } + fgets(buff, sizeof(buff), in); + fgets(buff_2, sizeof(buff_2), in); + string str_buff = buff; + str_buff += buff_2; + pclose(in); + return str_buff; +} + +int main() { + setenv("HIP_COHERENT_HOST_ALLOC","1000,0,1",1); + string output = getRes(); + istringstream buffer(output); + double res1, res2; + buffer >> res1; + buffer >> res2; + if((res2-res1*2)>0.000001) + exit(1); + std::cout << "PASSED" << std::endl; + return 0; +}