@@ -1003,7 +1003,7 @@ PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(act
|
||||
// Mark API
|
||||
PUBLIC_API void roctracer_mark(const char* str) {
|
||||
if (mark_api_callback_ptr) {
|
||||
mark_api_callback_ptr(ACTIVITY_DOMAIN_NUMBER, ACTIVITY_EXT_OP_MARK, str, NULL);
|
||||
mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL);
|
||||
roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
ROOT_PATH = ../..
|
||||
LIB_PATH = $(ROOT_PATH)/build
|
||||
ROC_LIBS = -L$(LIB_PATH) -lroctracer64
|
||||
export LD_LIBRARY_PATH=$(LIB_PATH)
|
||||
|
||||
HIP_PATH?= $(wildcard /opt/rocm/hip)
|
||||
ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
@@ -15,15 +20,13 @@ EXECUTABLE=./MatrixTranspose
|
||||
.PHONY: test
|
||||
|
||||
|
||||
all: clean $(EXECUTABLE) test
|
||||
all: clean $(EXECUTABLE)
|
||||
|
||||
CXXFLAGS =-g
|
||||
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -DLOCAL_BUILD=1
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
$(EXECUTABLE): $(OBJECTS)
|
||||
$(HIPCC) $(OBJECTS) -o $@
|
||||
|
||||
$(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS)
|
||||
|
||||
test: $(EXECUTABLE)
|
||||
$(EXECUTABLE)
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "roctracer_ext.h"
|
||||
|
||||
|
||||
#define WIDTH 1024
|
||||
@@ -84,10 +85,12 @@ int main() {
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
|
||||
roctracer_mark("before HIP LaunchKernel");
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
|
||||
gpuMatrix, WIDTH);
|
||||
roctracer_mark("after HIP LaunchKernel");
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
ROOT_PATH = ../..
|
||||
LIB_PATH = $(ROOT_PATH)/build
|
||||
ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -lroctx64
|
||||
|
||||
export LD_LIBRARY_PATH=$(LIB_PATH)
|
||||
ITERATIONS ?= 100
|
||||
|
||||
HCC_HOME ?= /opt/rocm/hcc
|
||||
HIP_PATH ?= /opt/rocm/hip
|
||||
HIP_PATH?= $(wildcard /opt/rocm/hip)
|
||||
ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
endif
|
||||
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
CXX = $(HIPCC)
|
||||
CXXFLAGS = -g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -DLOCAL_BUILD=1 -DITERATIONS=$(ITERATIONS)
|
||||
|
||||
TARGET=hcc
|
||||
|
||||
@@ -18,13 +18,13 @@ OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
EXECUTABLE=./MatrixTranspose
|
||||
|
||||
export LD_LIBRARY_PATH=$(LIB_PATH)
|
||||
|
||||
.PHONY: test
|
||||
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
all: clean $(EXECUTABLE)
|
||||
|
||||
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -DLOCAL_BUILD=1 -DITERATIONS=$(ITERATIONS)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
$(EXECUTABLE): $(OBJECTS)
|
||||
$(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS)
|
||||
|
||||
@@ -277,7 +277,7 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
|
||||
fprintf(hip_api_file_handle, "%s()\n", oss.str().c_str());
|
||||
}
|
||||
} else {
|
||||
fprintf(hip_api_file_handle, "%s(name(%s))\n", oss.str().c_str(), entry->name);
|
||||
fprintf(hip_api_file_handle, "%s(%s)\n", oss.str().c_str(), entry->name);
|
||||
}
|
||||
|
||||
fflush(hip_api_file_handle);
|
||||
|
||||
Ссылка в новой задаче
Block a user