[TransferBench] Adding shared memory per threadblock env var. Defaulting to 1 threadblock per CU (#436)
This commit is contained in:
@@ -1027,7 +1027,7 @@ void RunLink(EnvVars const& ev, size_t const N, int const iteration, Link& link)
|
||||
hipExtLaunchKernelGGL(ev.useMemset ? GpuMemsetKernel : GpuCopyKernel,
|
||||
dim3(link.numBlocksToUse, 1, 1),
|
||||
dim3(BLOCKSIZE, 1, 1),
|
||||
0, link.stream,
|
||||
ev.sharedMemBytes, link.stream,
|
||||
(ev.combineTiming && recordStart) ? link.startEvent : NULL,
|
||||
(ev.combineTiming && recordStop) ? link.stopEvent : NULL,
|
||||
0, link.blockParam);
|
||||
|
||||
مرجع در شماره جدید
Block a user