SWDEV-415259: Replacing HCC with AMD
__HIP_PLATFORM_HCC__ with __HIP_PLATFORM_AMD__
Change-Id: I2e79cff5a0216125b4ce2aa90104d4a1696bad08
[ROCm/roctracer commit: 67b3717da9]
This commit is contained in:
committed by
Tao Sang
vanhempi
5c6492d1ec
commit
2b78928d60
@@ -145,7 +145,7 @@ Tracing control API:
|
||||
### 3.2. Tracing Domains
|
||||
```
|
||||
Various tracing domains are supported. Each domain is assigned with
|
||||
a domain ID. The domains include HSA, HIP, and HCC runtime levels.
|
||||
a domain ID. The domains include HSA, HIP runtime levels.
|
||||
|
||||
Traced API domains:
|
||||
typedef enum {
|
||||
@@ -277,11 +277,11 @@ typedef struct {
|
||||
uint32_t mode; // roctracer mode
|
||||
size_t buffer_size; // buffer size
|
||||
// power of 2
|
||||
roctracer_allocator_t alloc_fun; // memory allocator
|
||||
roctracer_allocator_t alloc_fun; // memory allocator
|
||||
// function pointer
|
||||
void* alloc_arg; // memory allocator
|
||||
// function pointer
|
||||
roctracer_buffer_callback_t buffer_callback_fun; // tracer record
|
||||
roctracer_buffer_callback_t buffer_callback_fun; // tracer record
|
||||
// callback function
|
||||
void* buffer_callback_arg; // tracer record
|
||||
// callback arg
|
||||
@@ -296,12 +296,12 @@ roctracer_status_t roctracer_open_pool(
|
||||
|
||||
roctracer_status_t roctracer_open_pool_expl(
|
||||
const roctracer_properties_t* properties, // tracer pool properties
|
||||
roctracer_pool_t** pool); // [out] returns tracer pool if
|
||||
roctracer_pool_t** pool); // [out] returns tracer pool if
|
||||
// not NULL, otherwise sets the
|
||||
// default one if it is not set
|
||||
// yet; otherwise the error is
|
||||
// yet; otherwise the error is
|
||||
// generated
|
||||
|
||||
|
||||
Close tracer memory pool:
|
||||
roctracer_status_t roctracer_close_pool();
|
||||
|
||||
@@ -374,7 +374,7 @@ roctracer_status_t roctracer_activity_push_external_correlation_id(
|
||||
|
||||
Notifies that the calling thread is leaving an external API region.
|
||||
Pop an external correlation id for the calling thread.
|
||||
roctracer_status_t roctracer_activity_pop_external_correlation_id(
|
||||
roctracer_status_t roctracer_activity_pop_external_correlation_id(
|
||||
activity_correlation_id_t* last_id); // returns the last external correlation id
|
||||
// if not NULL
|
||||
```
|
||||
@@ -387,7 +387,7 @@ Tracing stop:
|
||||
void roctracer_stop();
|
||||
```
|
||||
## 4. rocTracer Usage Code Examples
|
||||
### 4.1. HIP API and HCC ops, GPU Activity Tracing
|
||||
### 4.1. HIP API ops, GPU Activity Tracing
|
||||
```
|
||||
#include <roctracer_hip.h>
|
||||
|
||||
@@ -399,7 +399,7 @@ void hip_api_callback(
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const hip_api_data_t* data = reinterpret_cast <const hip_api_data_t*>
|
||||
const hip_api_data_t* data = reinterpret_cast <const hip_api_data_t*>
|
||||
(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
|
||||
roctracer_id_string(ACTIVITY_DOMAIN_HIP_API, cid),
|
||||
@@ -411,15 +411,15 @@ void hip_api_callback(
|
||||
|
||||
// Activity tracing callback
|
||||
void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* record = reinterpret_cast<const
|
||||
const roctracer_record_t* record = reinterpret_cast<const
|
||||
roctracer_record_t*>(begin);
|
||||
const roctracer_record_t* end_record = reinterpret_cast<const
|
||||
const roctracer_record_t* end_record = reinterpret_cast<const
|
||||
roctracer_record_t*>(end);
|
||||
fprintf(stdout, "\tActivity records:\n");
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain,
|
||||
const char * name = roctracer_op_string(record->domain,
|
||||
record->activity_id, 0);
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)
|
||||
device_id(%d) stream_id(%lu)\n",
|
||||
name,
|
||||
record->correlation_id,
|
||||
@@ -439,8 +439,8 @@ int main() {
|
||||
properties.buffer_size = 12;
|
||||
properties.buffer_callback_fun = activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
|
||||
// Enable HIP API callbacks. HIP_API_ID_ANY can be used to trace all HIP
|
||||
|
||||
// Enable HIP API callbacks. HIP_API_ID_ANY can be used to trace all HIP
|
||||
// API calls.
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API,
|
||||
HIP_API_ID_hipModuleLaunchKernel,
|
||||
@@ -505,7 +505,7 @@ THE SOFTWARE.
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
// hipLaunchParm provides the execution configuration
|
||||
__global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in,
|
||||
__global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in,
|
||||
const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
@@ -514,7 +514,7 @@ __global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in,
|
||||
}
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
void matrixTransposeCPUReference(float* output, float* input, const unsigned
|
||||
void matrixTransposeCPUReference(float* output, float* input, const unsigned
|
||||
int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
@@ -549,34 +549,34 @@ int main() {
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float),
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float),
|
||||
hipMemcpyHostToDevice);
|
||||
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernel(matrixTranspose,
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH /
|
||||
hipLaunchKernel(matrixTranspose,
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH /
|
||||
THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
|
||||
gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float),
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float),
|
||||
hipMemcpyDeviceToHost);
|
||||
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
double eps = 1.0E-6;
|
||||
@@ -590,11 +590,11 @@ int main() {
|
||||
} else {
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
@@ -607,7 +607,7 @@ int main() {
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// HIP/HCC Callbacks/Activity tracing
|
||||
// HIP Callbacks/Activity tracing
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#include <roctracer_hip.h>
|
||||
|
||||
@@ -629,7 +629,7 @@ void hip_api_callback(
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>
|
||||
(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0),
|
||||
@@ -651,7 +651,7 @@ void hip_api_callback(
|
||||
(uint32_t)(data->args.hipMalloc.size));
|
||||
break;
|
||||
case HIP_API_ID_hipFree:
|
||||
fprintf(stdout, "ptr(%p),
|
||||
fprintf(stdout, "ptr(%p),
|
||||
data->args.hipFree.ptr);
|
||||
break;
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
@@ -676,18 +676,18 @@ void hip_api_callback(
|
||||
}
|
||||
|
||||
// Activity tracing callback
|
||||
// hipMalloc id(3) correlation_id(1):
|
||||
// hipMalloc id(3) correlation_id(1):
|
||||
// begin_ns(1525888652762640464) end_ns(1525888652762877067)
|
||||
void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* record = reinterpret_cast
|
||||
const roctracer_record_t* record = reinterpret_cast
|
||||
<const roctracer_record_t*>(begin);
|
||||
const roctracer_record_t* end_record = reinterpret_cast
|
||||
<const roctracer_record_t*>(end);
|
||||
fprintf(stdout, "\tActivity records:\n"); fflush(stdout);
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain,
|
||||
const char * name = roctracer_op_string(record->domain,
|
||||
record->activity_id, 0);
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) \
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) \
|
||||
device_id(%d) stream_id(%lu)",
|
||||
name,
|
||||
record->correlation_id,
|
||||
@@ -696,7 +696,7 @@ void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
record->device_id,
|
||||
record->stream_id
|
||||
);
|
||||
if (record->kind == hc::HSA_OP_ID_COPY)
|
||||
if (record->kind == hc::HSA_OP_ID_COPY)
|
||||
fprintf(stdout, " bytes(0x%zx)", record->bytes);
|
||||
fprintf(stdout, "\n");
|
||||
fflush(stdout);
|
||||
@@ -724,7 +724,7 @@ void stop_tracing() {
|
||||
ROCTRACER_CALL(roctracer_disable_api_callback());
|
||||
ROCTRACER_CALL(roctracer_disable_api_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
std::cout << "# STOP #############################" << std::endl
|
||||
std::cout << "# STOP #############################" << std::endl
|
||||
<< std::flush;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -38,7 +38,7 @@ set_target_properties(file_plugin PROPERTIES
|
||||
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
|
||||
|
||||
target_compile_definitions(file_plugin
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1)
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
target_include_directories(file_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc)
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ find_file(HIP_RUNTIME_API_H hip_runtime_api.h
|
||||
add_custom_command(
|
||||
OUTPUT hip_ostream_ops.h
|
||||
COMMAND ${CMAKE_C_COMPILER} "$<$<BOOL:${HIP_INCLUDE_DIRECTORIES}>:-I$<JOIN:${HIP_INCLUDE_DIRECTORIES},$<SEMICOLON>-I>>"
|
||||
-E "${HIP_RUNTIME_API_H}" -D__HIP_PLATFORM_HCC__=1 -D__HIP_ROCclr__=1 -o hip_runtime_api.h.i
|
||||
-E "${HIP_RUNTIME_API_H}" -D__HIP_PLATFORM_AMD__=1 -D__HIP_ROCclr__=1 -o hip_runtime_api.h.i
|
||||
BYPRODUCTS hip_runtime_api.h.i
|
||||
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
|
||||
-in hip_runtime_api.h.i -out hip_ostream_ops.h > /dev/null
|
||||
@@ -222,7 +222,7 @@ set_target_properties(roctracer_tool PROPERTIES
|
||||
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
|
||||
|
||||
target_compile_definitions(roctracer_tool
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1)
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
target_include_directories(roctracer_tool
|
||||
PRIVATE
|
||||
|
||||
@@ -71,7 +71,7 @@ add_custom_command(OUTPUT MatrixTranspose.c
|
||||
hip_add_executable(MatrixTranspose_ctest MatrixTranspose.c)
|
||||
## Adding generated build-id as hip_add_executable doesn't generate automatically
|
||||
target_link_options(MatrixTranspose_ctest PRIVATE "-Wl,--build-id=md5")
|
||||
target_compile_definitions(MatrixTranspose_ctest PRIVATE HIP_TEST=0 __HIP_PLATFORM_HCC__)
|
||||
target_compile_definitions(MatrixTranspose_ctest PRIVATE HIP_TEST=0 __HIP_PLATFORM_AMD__)
|
||||
target_include_directories(MatrixTranspose_ctest PRIVATE ${PROJECT_SOURCE_DIR}/inc)
|
||||
target_link_libraries(MatrixTranspose_ctest PRIVATE roctracer roctx)
|
||||
add_dependencies(mytest MatrixTranspose_ctest)
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user