Add support for extended launch syntax. (#1530)
* Add support for extended launch syntax.
* Add unit test.
* Fix typo
* hipExtLaunchKernelGGL lives in hip_ext.h
Change-Id: Ice32dab0d43475fda65c6a910c11416871a8f2ff
* [dtest] remove redundant include from hipModuleGetGlobal dtest
[ROCm/hip commit: 69e74c3e96]
This commit is contained in:
zatwierdzone przez
Rahul Garg
rodzic
5536940e12
commit
3ea404a8ba
@@ -176,5 +176,4 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
numBlocks, dimBlocks, sharedMemBytes,
|
||||
stream, &config[0]);
|
||||
}
|
||||
|
||||
#pragma GCC visibility pop
|
||||
|
||||
@@ -2885,6 +2885,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigne
|
||||
unsigned int sharedMemBytes, hipStream_t stream,
|
||||
void** kernelParams, void** extra);
|
||||
|
||||
|
||||
/**
|
||||
* @brief launches kernel f with launch parameters and shared memory on stream with arguments passed
|
||||
* to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute
|
||||
|
||||
@@ -60,12 +60,11 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** a
|
||||
|
||||
#endif // #ifdef __HCC__
|
||||
|
||||
|
||||
/**
|
||||
* @brief launches kernel f with launch parameters and shared memory on stream with arguments passed
|
||||
to kernelparams or extra
|
||||
*
|
||||
* @param [in[ f Kernel to launch.
|
||||
* @param [in[ f Kernel to launch.
|
||||
* @param [in] gridDimX X grid dimension specified in work-items
|
||||
* @param [in] gridDimY Y grid dimension specified in work-items
|
||||
* @param [in] gridDimZ Z grid dimension specified in work-items
|
||||
@@ -88,7 +87,6 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** a
|
||||
*
|
||||
* @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please
|
||||
refer to hip_porting_driver_api.md for sample usage.
|
||||
|
||||
* HIP/ROCm actually updates the start event when the associated kernel completes.
|
||||
*/
|
||||
HIP_PUBLIC_API
|
||||
@@ -111,8 +109,62 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
hipEvent_t stopEvent = nullptr)
|
||||
__attribute__((deprecated("use hipExtModuleLaunchKernel instead")));
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
namespace hip_impl {
|
||||
inline
|
||||
__attribute__((visibility("hidden")))
|
||||
void hipExtLaunchKernelGGLImpl(
|
||||
std::uintptr_t function_address,
|
||||
const dim3& numBlocks,
|
||||
const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent,
|
||||
std::uint32_t flags,
|
||||
void** kernarg) {
|
||||
|
||||
const auto& kd = hip_impl::get_program_state()
|
||||
.kernel_descriptor(function_address, target_agent(stream));
|
||||
|
||||
hipExtModuleLaunchKernel(kd, numBlocks.x * dimBlocks.x,
|
||||
numBlocks.y * dimBlocks.y,
|
||||
numBlocks.z * dimBlocks.z,
|
||||
dimBlocks.x, dimBlocks.y, dimBlocks.z,
|
||||
sharedMemBytes, stream, nullptr, kernarg,
|
||||
startEvent, stopEvent, flags);
|
||||
}
|
||||
} // namespace hip_impl
|
||||
|
||||
template <typename... Args, typename F = void (*)(Args...)>
|
||||
inline
|
||||
void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks,
|
||||
const dim3& dimBlocks, std::uint32_t sharedMemBytes,
|
||||
hipStream_t stream, hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent, std::uint32_t flags,
|
||||
Args... args) {
|
||||
hip_impl::hip_init();
|
||||
auto kernarg =
|
||||
hip_impl::make_kernarg(kernel, std::tuple<Args...>{std::move(args)...});
|
||||
std::size_t kernarg_size = kernarg.size();
|
||||
|
||||
void* config[]{
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER,
|
||||
kernarg.data(),
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE,
|
||||
&kernarg_size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
hip_impl::hipExtLaunchKernelGGLImpl(reinterpret_cast<std::uintptr_t>(kernel),
|
||||
numBlocks, dimBlocks, sharedMemBytes,
|
||||
stream, startEvent, stopEvent, flags,
|
||||
&config[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
// doxygen end AMD-specific features
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
#endif // #ifdef HIP_INCLUDE_HIP_HIP_EXT_H
|
||||
#endif // #iidef HIP_INCLUDE_HIP_HIP_EXT_H
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Test the Grid_Launch syntax.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_ext.h"
|
||||
#include "test_common.h"
|
||||
|
||||
void test(size_t N) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0,
|
||||
static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
|
||||
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
test(N);
|
||||
|
||||
passed();
|
||||
}
|
||||
@@ -32,7 +32,6 @@ THE SOFTWARE.
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <hip/hip_ext.h>
|
||||
|
||||
#define LEN 64
|
||||
#define SIZE LEN * sizeof(float)
|
||||
|
||||
Reference in New Issue
Block a user