update for gfx9 and 3.1

Change-Id: I0c218f8a2dcdad1c8deb44770d8a64ccd95a92fb


[ROCm/rocprofiler commit: 299a08e4be]
このコミットが含まれているのは:
Evgeny
2020-03-04 18:35:18 -06:00
コミット 0e767e9864
8個のファイルの変更142行の追加23行の削除
+24
ファイルの表示
@@ -70,6 +70,30 @@ set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
set ( LIB_DIR "${ROOT_DIR}/src" )
set ( TEST_DIR "${ROOT_DIR}/test" )
## Enable tracing API
if (NOT USE_PROF_API)
set(USE_PROF_API 1)
endif()
# Protocol header lookup
if(USE_PROF_API EQUAL 1)
find_path(PROF_API_HEADER_DIR prof_protocol.h
HINTS
${PROF_API_HEADER_PATH}
PATHS
/opt/rocm/roctracer
PATH_SUFFIXES
include/ext
)
if(NOT PROF_API_HEADER_DIR)
MESSAGE(ERROR "Profiling API header not found. Tracer integration disabled. Use -DPROF_API_HEADER_PATH=<path to prof_protocol.h header>")
else()
add_definitions(-DUSE_PROF_API=1)
include_directories(${PROF_API_HEADER_DIR})
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}")
endif()
endif()
## Build library
include ( ${LIB_DIR}/CMakeLists.txt )
+1 -1
ファイルの表示
@@ -83,7 +83,7 @@ target_link_libraries ( ${INEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} $
## Building ctrl test executable
add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} )
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_LIB_PATH}/../include )
target_link_libraries ( ${EXE_NAME} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/tool/*.xml ${PROJECT_BINARY_DIR}" )
+7 -7
ファイルの表示
@@ -228,7 +228,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
unsigned metrics_input(rocprofiler_feature_t** ret) {
// Profiling feature objects
const unsigned feature_count = 9;
const unsigned feature_count = 6;
rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count];
memset(features, 0, feature_count * sizeof(rocprofiler_feature_t));
@@ -245,12 +245,12 @@ unsigned metrics_input(rocprofiler_feature_t** ret) {
features[4].name = "SQ_INSTS_VALU";
features[5].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[5].name = "VALUInsts";
features[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[6].name = "TCC_HIT_sum";
features[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[7].name = "TCC_MISS_sum";
features[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[8].name = "WRITE_SIZE";
// features[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
// features[6].name = "TCC_HIT_sum";
// features[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
// features[7].name = "TCC_MISS_sum";
// features[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
// features[8].name = "WRITE_SIZE";
*ret = features;
return feature_count;
+7 -7
ファイルの表示
@@ -82,7 +82,7 @@ int main() {
rocprofiler_properties_t properties;
// Profiling feature objects
const unsigned feature_count = 9;
const unsigned feature_count = 6;
rocprofiler_feature_t feature[feature_count];
// PMC events
memset(feature, 0, sizeof(feature));
@@ -98,12 +98,12 @@ int main() {
feature[4].name = "SQ_INSTS_VALU";
feature[5].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[5].name = "VALUInsts";
feature[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[6].name = "TCC_HIT_sum";
feature[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[7].name = "TCC_MISS_sum";
feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[8].name = "WRITE_SIZE";
// feature[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
// feature[6].name = "TCC_HIT_sum";
// feature[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
// feature[7].name = "TCC_MISS_sum";
// feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
// feature[8].name = "WRITE_SIZE";
// feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
// feature[8].name = "TCC_EA_WRREQ_sum";
// feature[9].kind = ROCPROFILER_FEATURE_KIND_METRIC;
+93
ファイルの表示
@@ -20,9 +20,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#include <dirent.h>
#include <hsa.h>
#include <hsakmt.h>
#include <stdio.h>
#include <string.h>
#include <fstream>
#include <iostream>
#include <thread>
@@ -31,6 +35,64 @@ THE SOFTWARE.
#include "dummy_kernel/dummy_kernel.h"
#include "simple_convolution/simple_convolution.h"
int get_gpu_node_id() {
int gpu_node = - 1;
#if 0
// find a valid gpu node from /sys/class/kfd/kfd/topology/nodes
std::string path = "/sys/class/kfd/kfd/topology/nodes";
DIR *dir;
struct dirent *ent;
if ((dir = opendir(path.c_str())) != NULL) {
while ((ent = readdir(dir)) != NULL) {
std::string dir = ent->d_name;
if (dir.find_first_not_of("0123456789") == std::string::npos) {
std::string file = path + "/" + ent->d_name + "/gpu_id";
std::ifstream infile(file);
int id;
infile >> id;
if (id != 0) {
gpu_node = atoi(ent->d_name);
break;
}
}
}
closedir (dir);
}
#else
HsaSystemProperties m_SystemProperties;
memset(&m_SystemProperties, 0, sizeof(m_SystemProperties));
HSAKMT_STATUS status = hsaKmtAcquireSystemProperties(&m_SystemProperties);
if (status != HSAKMT_STATUS_SUCCESS) {
std::cerr << "Error in hsaKmtAcquireSystemProperties"<< std::endl;
return 1;
}
// tranverse all CPU and GPU nodes and break when a GPU node is found
for (unsigned i = 0; i < m_SystemProperties.NumNodes; ++i) {
HsaNodeProperties nodeProperties;
memset(&nodeProperties, 0, sizeof(HsaNodeProperties));
status = hsaKmtGetNodeProperties(i, &nodeProperties);
if (status != HSAKMT_STATUS_SUCCESS) {
std::cerr << "Error in hsaKmtAcquireSystemProperties"<< std::endl;
break;
} else if(nodeProperties.NumFComputeCores) {
gpu_node = i;
break;
}
}
#endif
printf ("GPU node id(%d)\n", gpu_node);
return gpu_node;
}
void thread_fun(const int kiter, const int diter, const uint32_t agents_number) {
const AgentInfo* agent_info[agents_number];
hsa_queue_t* queue[agents_number];
@@ -65,12 +127,31 @@ int main(int argc, char** argv) {
const char* diter_s = getenv("ROCP_DITER");
const char* agents_s = getenv("ROCP_AGENTS");
const char* thrs_s = getenv("ROCP_THRS");
const char* spm_enabled = getenv("ROCP_SPM");
int gpu_node_id = -1;
const int kiter = (kiter_s != NULL) ? atol(kiter_s) : 1;
const int diter = (diter_s != NULL) ? atol(diter_s) : 1;
const uint32_t agents_number = (agents_s != NULL) ? (uint32_t)atol(agents_s) : 1;
const int thrs = (thrs_s != NULL) ? atol(thrs_s) : 1;
if (spm_enabled != NULL) {
if (hsa_init() != HSA_STATUS_SUCCESS) {
std::cerr << "Error in hsa_init()" << std::endl;
return 1;
}
gpu_node_id = get_gpu_node_id();
if (gpu_node_id == -1) {
std::cerr << "Error in get_gpu_node_id()" << std::endl;
return 1;
}
HSAKMT_STATUS status = hsaKmtEnableDebugTrap(gpu_node_id, INVALID_QUEUEID);
if (status != HSAKMT_STATUS_SUCCESS) {
std::cerr << "Error in enabling debug trap" << std::endl;
return 1;
}
}
TestHsa::HsaInstantiate();
std::vector<std::thread> t(thrs);
@@ -81,6 +162,18 @@ int main(int argc, char** argv) {
t[n].join();
}
if (spm_enabled != NULL) {
if (gpu_node_id == -1) {
std::cerr << "Invalid GPU node id" << std::endl;
return 1;
}
HSAKMT_STATUS status = hsaKmtDisableDebugTrap(gpu_node_id);
if (status != HSAKMT_STATUS_SUCCESS) {
std::cerr << "Error in disabling debug" << std::endl;
return 1;
}
}
TestHsa::HsaShutdown();
return 0;
}
+8 -6
ファイルの表示
@@ -116,12 +116,14 @@ export ROCP_INPUT=input1.xml
eval_test "'rocprof' libtool test n-threads" ./test/ctrl
## SPM test
export ROCP_KITER=3
export ROCP_DITER=3
export ROCP_AGENTS=1
export ROCP_THRS=1
export ROCP_INPUT=spm_input.xml
eval_test "libtool test, SPM trace test" ./test/ctrl
# export ROCP_KITER=3
# export ROCP_DITER=3
# export ROCP_AGENTS=1
# export ROCP_THRS=1
# export ROCP_INPUT=spm_input.xml
# export ROCP_SPM=1
# eval_test "libtool test, SPM trace test" ./test/ctrl
# unset ROCP_SPM
## Libtool test, counter sets
# Memcopies tracking
+1 -1
ファイルの表示
@@ -10,7 +10,7 @@
# List of metrics
<metric
name=SQ:4,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts,WriteSize
name=SQ:4,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts
></metric>
# SQTT trace with parameters
+1 -1
ファイルの表示
@@ -10,5 +10,5 @@
# List of metrics
<metric
name=SQ:4,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts,WriteSize
name=SQ:4,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts
></metric>