update for gfx9 and 3.1
Change-Id: I0c218f8a2dcdad1c8deb44770d8a64ccd95a92fb
[ROCm/rocprofiler commit: 299a08e4be]
このコミットが含まれているのは:
@@ -70,6 +70,30 @@ set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
|
||||
set ( LIB_DIR "${ROOT_DIR}/src" )
|
||||
set ( TEST_DIR "${ROOT_DIR}/test" )
|
||||
|
||||
## Enable tracing API
|
||||
if (NOT USE_PROF_API)
|
||||
set(USE_PROF_API 1)
|
||||
endif()
|
||||
|
||||
# Protocol header lookup
|
||||
if(USE_PROF_API EQUAL 1)
|
||||
find_path(PROF_API_HEADER_DIR prof_protocol.h
|
||||
HINTS
|
||||
${PROF_API_HEADER_PATH}
|
||||
PATHS
|
||||
/opt/rocm/roctracer
|
||||
PATH_SUFFIXES
|
||||
include/ext
|
||||
)
|
||||
if(NOT PROF_API_HEADER_DIR)
|
||||
MESSAGE(ERROR "Profiling API header not found. Tracer integration disabled. Use -DPROF_API_HEADER_PATH=<path to prof_protocol.h header>")
|
||||
else()
|
||||
add_definitions(-DUSE_PROF_API=1)
|
||||
include_directories(${PROF_API_HEADER_DIR})
|
||||
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
## Build library
|
||||
include ( ${LIB_DIR}/CMakeLists.txt )
|
||||
|
||||
|
||||
@@ -83,7 +83,7 @@ target_link_libraries ( ${INEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} $
|
||||
|
||||
## Building ctrl test executable
|
||||
add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} )
|
||||
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_LIB_PATH}/../include )
|
||||
target_link_libraries ( ${EXE_NAME} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/tool/*.xml ${PROJECT_BINARY_DIR}" )
|
||||
|
||||
@@ -228,7 +228,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
|
||||
unsigned metrics_input(rocprofiler_feature_t** ret) {
|
||||
// Profiling feature objects
|
||||
const unsigned feature_count = 9;
|
||||
const unsigned feature_count = 6;
|
||||
rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count];
|
||||
memset(features, 0, feature_count * sizeof(rocprofiler_feature_t));
|
||||
|
||||
@@ -245,12 +245,12 @@ unsigned metrics_input(rocprofiler_feature_t** ret) {
|
||||
features[4].name = "SQ_INSTS_VALU";
|
||||
features[5].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[5].name = "VALUInsts";
|
||||
features[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[6].name = "TCC_HIT_sum";
|
||||
features[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[7].name = "TCC_MISS_sum";
|
||||
features[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[8].name = "WRITE_SIZE";
|
||||
// features[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
// features[6].name = "TCC_HIT_sum";
|
||||
// features[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
// features[7].name = "TCC_MISS_sum";
|
||||
// features[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
// features[8].name = "WRITE_SIZE";
|
||||
|
||||
*ret = features;
|
||||
return feature_count;
|
||||
|
||||
@@ -82,7 +82,7 @@ int main() {
|
||||
rocprofiler_properties_t properties;
|
||||
|
||||
// Profiling feature objects
|
||||
const unsigned feature_count = 9;
|
||||
const unsigned feature_count = 6;
|
||||
rocprofiler_feature_t feature[feature_count];
|
||||
// PMC events
|
||||
memset(feature, 0, sizeof(feature));
|
||||
@@ -98,12 +98,12 @@ int main() {
|
||||
feature[4].name = "SQ_INSTS_VALU";
|
||||
feature[5].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[5].name = "VALUInsts";
|
||||
feature[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[6].name = "TCC_HIT_sum";
|
||||
feature[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[7].name = "TCC_MISS_sum";
|
||||
feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
feature[8].name = "WRITE_SIZE";
|
||||
// feature[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
// feature[6].name = "TCC_HIT_sum";
|
||||
// feature[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
// feature[7].name = "TCC_MISS_sum";
|
||||
// feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
// feature[8].name = "WRITE_SIZE";
|
||||
// feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
// feature[8].name = "TCC_EA_WRREQ_sum";
|
||||
// feature[9].kind = ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
|
||||
@@ -20,9 +20,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include <dirent.h>
|
||||
#include <hsa.h>
|
||||
#include <hsakmt.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
@@ -31,6 +35,64 @@ THE SOFTWARE.
|
||||
#include "dummy_kernel/dummy_kernel.h"
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
|
||||
int get_gpu_node_id() {
|
||||
int gpu_node = - 1;
|
||||
|
||||
#if 0
|
||||
// find a valid gpu node from /sys/class/kfd/kfd/topology/nodes
|
||||
std::string path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
DIR *dir;
|
||||
struct dirent *ent;
|
||||
|
||||
if ((dir = opendir(path.c_str())) != NULL) {
|
||||
while ((ent = readdir(dir)) != NULL) {
|
||||
|
||||
std::string dir = ent->d_name;
|
||||
|
||||
if (dir.find_first_not_of("0123456789") == std::string::npos) {
|
||||
std::string file = path + "/" + ent->d_name + "/gpu_id";
|
||||
std::ifstream infile(file);
|
||||
int id;
|
||||
|
||||
infile >> id;
|
||||
if (id != 0) {
|
||||
gpu_node = atoi(ent->d_name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir (dir);
|
||||
}
|
||||
#else
|
||||
HsaSystemProperties m_SystemProperties;
|
||||
memset(&m_SystemProperties, 0, sizeof(m_SystemProperties));
|
||||
|
||||
HSAKMT_STATUS status = hsaKmtAcquireSystemProperties(&m_SystemProperties);
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
std::cerr << "Error in hsaKmtAcquireSystemProperties"<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// tranverse all CPU and GPU nodes and break when a GPU node is found
|
||||
for (unsigned i = 0; i < m_SystemProperties.NumNodes; ++i) {
|
||||
HsaNodeProperties nodeProperties;
|
||||
memset(&nodeProperties, 0, sizeof(HsaNodeProperties));
|
||||
|
||||
status = hsaKmtGetNodeProperties(i, &nodeProperties);
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
std::cerr << "Error in hsaKmtAcquireSystemProperties"<< std::endl;
|
||||
break;
|
||||
} else if(nodeProperties.NumFComputeCores) {
|
||||
gpu_node = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
printf ("GPU node id(%d)\n", gpu_node);
|
||||
return gpu_node;
|
||||
}
|
||||
|
||||
void thread_fun(const int kiter, const int diter, const uint32_t agents_number) {
|
||||
const AgentInfo* agent_info[agents_number];
|
||||
hsa_queue_t* queue[agents_number];
|
||||
@@ -65,12 +127,31 @@ int main(int argc, char** argv) {
|
||||
const char* diter_s = getenv("ROCP_DITER");
|
||||
const char* agents_s = getenv("ROCP_AGENTS");
|
||||
const char* thrs_s = getenv("ROCP_THRS");
|
||||
const char* spm_enabled = getenv("ROCP_SPM");
|
||||
int gpu_node_id = -1;
|
||||
|
||||
const int kiter = (kiter_s != NULL) ? atol(kiter_s) : 1;
|
||||
const int diter = (diter_s != NULL) ? atol(diter_s) : 1;
|
||||
const uint32_t agents_number = (agents_s != NULL) ? (uint32_t)atol(agents_s) : 1;
|
||||
const int thrs = (thrs_s != NULL) ? atol(thrs_s) : 1;
|
||||
|
||||
if (spm_enabled != NULL) {
|
||||
if (hsa_init() != HSA_STATUS_SUCCESS) {
|
||||
std::cerr << "Error in hsa_init()" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
gpu_node_id = get_gpu_node_id();
|
||||
if (gpu_node_id == -1) {
|
||||
std::cerr << "Error in get_gpu_node_id()" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
HSAKMT_STATUS status = hsaKmtEnableDebugTrap(gpu_node_id, INVALID_QUEUEID);
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
std::cerr << "Error in enabling debug trap" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
TestHsa::HsaInstantiate();
|
||||
|
||||
std::vector<std::thread> t(thrs);
|
||||
@@ -81,6 +162,18 @@ int main(int argc, char** argv) {
|
||||
t[n].join();
|
||||
}
|
||||
|
||||
if (spm_enabled != NULL) {
|
||||
if (gpu_node_id == -1) {
|
||||
std::cerr << "Invalid GPU node id" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
HSAKMT_STATUS status = hsaKmtDisableDebugTrap(gpu_node_id);
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
std::cerr << "Error in disabling debug" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
TestHsa::HsaShutdown();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -116,12 +116,14 @@ export ROCP_INPUT=input1.xml
|
||||
eval_test "'rocprof' libtool test n-threads" ./test/ctrl
|
||||
|
||||
## SPM test
|
||||
export ROCP_KITER=3
|
||||
export ROCP_DITER=3
|
||||
export ROCP_AGENTS=1
|
||||
export ROCP_THRS=1
|
||||
export ROCP_INPUT=spm_input.xml
|
||||
eval_test "libtool test, SPM trace test" ./test/ctrl
|
||||
# export ROCP_KITER=3
|
||||
# export ROCP_DITER=3
|
||||
# export ROCP_AGENTS=1
|
||||
# export ROCP_THRS=1
|
||||
# export ROCP_INPUT=spm_input.xml
|
||||
# export ROCP_SPM=1
|
||||
# eval_test "libtool test, SPM trace test" ./test/ctrl
|
||||
# unset ROCP_SPM
|
||||
|
||||
## Libtool test, counter sets
|
||||
# Memcopies tracking
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
# List of metrics
|
||||
<metric
|
||||
name=SQ:4,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts,WriteSize
|
||||
name=SQ:4,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts
|
||||
></metric>
|
||||
|
||||
# SQTT trace with parameters
|
||||
|
||||
@@ -10,5 +10,5 @@
|
||||
|
||||
# List of metrics
|
||||
<metric
|
||||
name=SQ:4,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts,WriteSize
|
||||
name=SQ:4,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts
|
||||
></metric>
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする