From 77d71346191d1d55b70cd7bc18cc9fdff1d6fe55 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 19 Jul 2016 13:51:44 -0500 Subject: [PATCH] added fix for signal overflow in kernels Change-Id: Ie0b1f97f69b7d7b34e445f6f120472819be03a0e --- hipamd/src/hip_hcc.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index a80db8fd2e..c70c35f23b 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -1085,6 +1085,28 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) } } +// HIP uses only 64 kernels. If the performance decrease, add more +uint32_t kernelCount = 0; +std::vector vCF(64); + +void incKernelCnt(hc::completion_future *cf){ + vCF[kernelCount] = cf; + kernelCount++; +} + +void decKernelCnt(){ + if(kernelCount > 63){ + uint32_t len = kernelCount; + for(uint32_t i =0;iwait(); + } + delete vCF[i]; + vCF[i] = NULL; + kernelCount--; + } + } +} // TODO - data-up to data-down: // Called just before a kernel is launched from hipLaunchKernel. @@ -1114,6 +1136,7 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_ // *av = &stream->_av; lp->av = &stream->_av; lp->cf = new hc::completion_future; + incKernelCnt(lp->cf); // lp->av = static_cast(av); // lp->cf = static_cast(malloc(sizeof(hc::completion_future))); return (stream); @@ -1143,6 +1166,7 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri // *av = &stream->_av; lp->av = &stream->_av; lp->cf = new hc::completion_future; + incKernelCnt(lp->cf); // lp->av = static_cast(av); // lp->cf = static_cast(malloc(sizeof(hc::completion_future))); return (stream); @@ -1173,6 +1197,7 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri // *av = &stream->_av; lp->av = &stream->_av; lp->cf = new hc::completion_future; + incKernelCnt(lp->cf); // lp->av = static_cast(av); // lp->cf = static_cast(malloc(sizeof(hc::completion_future))); return (stream); @@ -1203,6 +1228,7 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, g // *av = &stream->_av; lp->av = &stream->_av; lp->cf = new hc::completion_future; + incKernelCnt(lp->cf); // lp->av = static_cast(av); // lp->cf = static_cast(malloc(sizeof(hc::completion_future))); return (stream); @@ -1215,6 +1241,7 @@ void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp) { // stream->lockclose_postKernelCommand(cf); stream->lockclose_postKernelCommand(*lp.cf); + decKernelCnt(); if (HIP_LAUNCH_BLOCKING) { tprintf(DB_SYNC, " stream:%p LAUNCH_BLOCKING for kernel completion\n", stream); }