File reorganization
Move the tracer_tool from the 'test' directory to the 'src' directory. Change-Id: I13768b9610cd359f78a66147f0255ab1e4c657e9
This commit is contained in:
+4
-26
@@ -22,21 +22,13 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.0)
|
||||
|
||||
## Set module name and project name.
|
||||
set(ROCTRACER_NAME "roctracer")
|
||||
set(ROCTRACER_TARGET "${ROCTRACER_NAME}")
|
||||
|
||||
string(TOUPPER "${ROCTRACER_NAME}" ROCTRACER_NAME_upper)
|
||||
project(${ROCTRACER_NAME_upper} VERSION 4.1.0)
|
||||
project(ROCTRACER VERSION 4.1.0)
|
||||
|
||||
## Build is not supported on Windows plaform
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "Windows build is not supported.")
|
||||
endif ()
|
||||
|
||||
## Adding default path cmake modules
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
add_compile_options(-Wall -Werror)
|
||||
@@ -51,6 +43,9 @@ if(NOT DEFINED LIBRARY_TYPE)
|
||||
set(LIBRARY_TYPE SHARED)
|
||||
endif()
|
||||
|
||||
## Set module name
|
||||
string(TOLOWER "${CMAKE_PROJECT_NAME}" ROCTRACER_NAME)
|
||||
|
||||
## Build libraries
|
||||
include(src/CMakeLists.txt)
|
||||
|
||||
@@ -59,23 +54,6 @@ if(${LIBRARY_TYPE} STREQUAL SHARED)
|
||||
add_subdirectory(test ${PROJECT_BINARY_DIR}/test)
|
||||
endif()
|
||||
|
||||
## pbulic headers
|
||||
set(PUBLIC_HEADERS
|
||||
roctx.h
|
||||
roctracer.h
|
||||
roctracer_ext.h
|
||||
roctracer_hip.h
|
||||
roctracer_hcc.h
|
||||
roctracer_hsa.h
|
||||
roctracer_roctx.h
|
||||
ext/prof_protocol.h
|
||||
)
|
||||
set(GEN_HEADERS
|
||||
hip_ostream_ops.h
|
||||
hsa_prof_str.h
|
||||
hsa_ostream_ops.h
|
||||
)
|
||||
|
||||
if(${LIBRARY_TYPE} STREQUAL SHARED)
|
||||
|
||||
## Installation and packaging
|
||||
|
||||
@@ -28,7 +28,7 @@ events_count = {}
|
||||
events_order = {}
|
||||
events_order_r = {}
|
||||
trace2info = {}
|
||||
trace2info_filename = 'test/tests_trace_cmp_levels.txt'
|
||||
trace2info_filename = 'test/golden_traces/tests_trace_cmp_levels.txt'
|
||||
|
||||
# Parses trace comparison config file and stores the info in a dictionary
|
||||
def parse_trace_levels(trace_config_filename, check_trace_flag):
|
||||
@@ -151,8 +151,8 @@ def check_trace_status(tracename, verbose, check_trace_flag):
|
||||
print('PASSED!')
|
||||
return 0
|
||||
|
||||
trace = 'test/' + tracename + '.txt'
|
||||
rtrace = tracename + '.txt'
|
||||
trace = 'test/golden_traces/' + tracename + '.txt'
|
||||
rtrace = 'test/out/' + tracename + '.out'
|
||||
if os.path.basename(tracename) in trace2info.keys():
|
||||
(trace_level, no_events_cnt, events2ignore, events2chkcnt, events2chkord, events2ch) = trace2info[os.path.basename(tracename)]
|
||||
trace_level = trace_level.rstrip('\n')
|
||||
|
||||
+1
-1
@@ -324,7 +324,7 @@ class API_DescrParser:
|
||||
|
||||
self.content += '\n'
|
||||
self.content += '#if PROF_API_IMPL\n'
|
||||
self.content += '#include \"core/callback_table.h\"\n';
|
||||
self.content += '#include \"callback_table.h\"\n';
|
||||
self.content += 'namespace roctracer {\n'
|
||||
self.content += 'namespace hsa_support {\n'
|
||||
self.add_section('API callback functions', '', self.gen_callbacks)
|
||||
|
||||
+54
-7
@@ -113,7 +113,24 @@ add_custom_command(
|
||||
|
||||
# Build the ROCtracer library
|
||||
|
||||
file(GLOB ROCTRACER_SOURCES "src/core/*.cpp" "src/util/*.cpp")
|
||||
file(GLOB ROCTRACER_SOURCES "src/roctracer/*.cpp" "src/util/*.cpp")
|
||||
|
||||
set(PUBLIC_HEADERS
|
||||
roctx.h
|
||||
roctracer.h
|
||||
roctracer_ext.h
|
||||
roctracer_hip.h
|
||||
roctracer_hcc.h
|
||||
roctracer_hsa.h
|
||||
roctracer_roctx.h
|
||||
ext/prof_protocol.h
|
||||
)
|
||||
|
||||
set(GEN_HEADERS
|
||||
hip_ostream_ops.h
|
||||
hsa_prof_str.h
|
||||
hsa_ostream_ops.h
|
||||
)
|
||||
|
||||
add_library(roctracer ${LIBRARY_TYPE}
|
||||
${ROCTRACER_SOURCES}
|
||||
@@ -125,7 +142,7 @@ add_library(roctracer ${LIBRARY_TYPE}
|
||||
set_target_properties(roctracer PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
OUTPUT_NAME "roctracer64"
|
||||
LINK_DEPENDS core.exportmap
|
||||
LINK_DEPENDS roctracer.exportmap
|
||||
VERSION ${ROCTRACER_VERSION}
|
||||
SOVERSION ${ROCTRACER_VERSION_MAJOR}
|
||||
)
|
||||
@@ -144,15 +161,15 @@ target_include_directories(roctracer
|
||||
${HIP_INCLUDE_DIRECTORIES} ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
|
||||
PRIVATE
|
||||
${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}/src
|
||||
${PROJECT_BINARY_DIR}/inc
|
||||
${PROJECT_SOURCE_DIR}/src/roctracer ${PROJECT_BINARY_DIR}/inc
|
||||
)
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/core/exportmap.in core.exportmap @ONLY)
|
||||
target_link_options(roctracer PRIVATE -Wl,--version-script=core.exportmap -Wl,--no-undefined)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/roctracer/exportmap.in roctracer.exportmap @ONLY)
|
||||
target_link_options(roctracer PRIVATE -Wl,--version-script=roctracer.exportmap -Wl,--no-undefined)
|
||||
|
||||
target_link_libraries(roctracer PRIVATE hsa-runtime64::hsa-runtime64 Threads::Threads dl)
|
||||
|
||||
install(TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION lib)
|
||||
install(TARGETS roctracer LIBRARY DESTINATION lib)
|
||||
foreach(header ${PUBLIC_HEADERS})
|
||||
get_filename_component(header_subdir ${header} DIRECTORY)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/${header} DESTINATION include/${ROCTRACER_NAME}/${header_subdir})
|
||||
@@ -178,7 +195,7 @@ set_target_properties(roctx PROPERTIES
|
||||
)
|
||||
|
||||
target_include_directories(roctx PRIVATE
|
||||
${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc
|
||||
${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}/src/roctracer
|
||||
${PROJECT_SOURCE_DIR}/src
|
||||
)
|
||||
|
||||
@@ -186,3 +203,33 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/roctx/exportmap.in roctx.exportma
|
||||
target_link_options(roctx PRIVATE -Wl,--version-script=roctx.exportmap -Wl,--no-undefined)
|
||||
|
||||
install(TARGETS roctx LIBRARY DESTINATION lib)
|
||||
|
||||
## Build the tracer_tool library
|
||||
|
||||
if (${LIBRARY_TYPE} STREQUAL SHARED)
|
||||
|
||||
file(GLOB TRACER_TOOL_SOURCES "src/tracer_tool/*.cpp" "src/util/*.cpp")
|
||||
|
||||
add_library(roctracer_tool SHARED ${TRACER_TOOL_SOURCES})
|
||||
|
||||
set_target_properties(roctracer_tool PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/tracer_tool/exportmap
|
||||
)
|
||||
|
||||
target_compile_definitions(roctracer_tool PRIVATE
|
||||
HIP_PROF_HIP_API_STRING=1
|
||||
__HIP_PLATFORM_HCC__=1
|
||||
)
|
||||
|
||||
target_include_directories(roctracer_tool PRIVATE
|
||||
${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}/src/roctracer
|
||||
${PROJECT_SOURCE_DIR}/src ${HIP_INCLUDE_DIRECTORIES} ${GEN_INC_DIR}
|
||||
)
|
||||
|
||||
target_link_libraries(roctracer_tool roctracer hsa-runtime64::hsa-runtime64 Threads::Threads atomic dl)
|
||||
target_link_options(roctracer_tool PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/tracer_tool/exportmap -Wl,--no-undefined)
|
||||
|
||||
install(TARGETS roctracer_tool LIBRARY DESTINATION lib/${ROCTRACER_NAME})
|
||||
|
||||
endif()
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#ifndef CALLBACK_TABLE_H_
|
||||
#define CALLBACK_TABLE_H_
|
||||
|
||||
#include <ext/prof_protocol.h>
|
||||
#include "ext/prof_protocol.h"
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
@@ -18,11 +18,11 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "inc/roctracer.h"
|
||||
#include "inc/roctracer_hip.h"
|
||||
#include "inc/roctracer_ext.h"
|
||||
#include "inc/roctracer_roctx.h"
|
||||
#include "inc/roctracer_hsa.h"
|
||||
#include "roctracer.h"
|
||||
#include "roctracer_hip.h"
|
||||
#include "roctracer_ext.h"
|
||||
#include "roctracer_roctx.h"
|
||||
#include "roctracer_hsa.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <dirent.h>
|
||||
@@ -36,10 +36,10 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "core/journal.h"
|
||||
#include "core/loader.h"
|
||||
#include "core/memory_pool.h"
|
||||
#include "core/tracker.h"
|
||||
#include "journal.h"
|
||||
#include "loader.h"
|
||||
#include "memory_pool.h"
|
||||
#include "tracker.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
+4
-4
@@ -18,13 +18,13 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "inc/roctx.h"
|
||||
#include "inc/roctracer_roctx.h"
|
||||
#include "roctx.h"
|
||||
#include "roctracer_roctx.h"
|
||||
#include "ext/prof_protocol.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "inc/ext/prof_protocol.h"
|
||||
#include "core/callback_table.h"
|
||||
#include "callback_table.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef EVT_STATS_H_
|
||||
#define EVT_STATS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
template <class evt_id_t, class evt_weight_t> class EvtStatsT {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef uint64_t evt_count_t;
|
||||
typedef double evt_avr_t;
|
||||
struct evt_record_t {
|
||||
uint64_t count;
|
||||
evt_avr_t avr;
|
||||
evt_record_t() : count(0), avr(0) {}
|
||||
};
|
||||
typedef typename std::map<evt_id_t, evt_record_t> map_t;
|
||||
typedef typename std::map<evt_id_t, const char*> labels_t;
|
||||
|
||||
// Comparison function
|
||||
struct cmpfun {
|
||||
template <typename T> bool operator()(const T& a, const T& b) const {
|
||||
return (a.second.avr != b.second.avr) ? a.second.avr < b.second.avr : a.first < b.first;
|
||||
}
|
||||
};
|
||||
|
||||
inline void add_event(evt_id_t id, evt_weight_t weight) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
// printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout);
|
||||
|
||||
evt_record_t& rec = map_[id];
|
||||
const evt_count_t prev_count = rec.count;
|
||||
const evt_count_t new_count = prev_count + 1;
|
||||
const evt_avr_t prev_avr = rec.avr;
|
||||
const evt_avr_t new_avr = ((prev_avr * prev_count) + weight) / new_count;
|
||||
|
||||
rec.count = new_count;
|
||||
rec.avr = new_avr;
|
||||
}
|
||||
|
||||
void dump() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
fprintf(stdout, "Dumping %s\n", path_);
|
||||
fflush(stdout);
|
||||
|
||||
typedef typename std::set<std::pair<evt_id_t, evt_record_t>, cmpfun> set_t;
|
||||
set_t s_(map_.begin(), map_.end());
|
||||
|
||||
uint64_t index = 0;
|
||||
for (auto& e : s_) {
|
||||
const evt_id_t id = e.first;
|
||||
const char* label = get_label(id);
|
||||
std::ostringstream oss;
|
||||
oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr)
|
||||
<< "," << (uint64_t)(e.second.count * e.second.avr);
|
||||
fprintf(fdes_, "%s\n", oss.str().c_str());
|
||||
index += 1;
|
||||
}
|
||||
|
||||
fclose(fdes_);
|
||||
}
|
||||
|
||||
const char* get_label(const uint32_t& id) {
|
||||
auto ret = labels_.insert({id, NULL});
|
||||
const char* label = ret.first->second;
|
||||
return label;
|
||||
}
|
||||
const char* get_label(const char* id) { return id; }
|
||||
const char* get_label(const std::string& id) { return id.c_str(); }
|
||||
|
||||
void set_label(evt_id_t id, const char* label) {
|
||||
// printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout);
|
||||
labels_[id] = label;
|
||||
}
|
||||
|
||||
EvtStatsT(FILE* f, const char* path) : fdes_(f), path_(path) {
|
||||
// printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout);
|
||||
fprintf(fdes_, "Index,Name,Count,Avr,Total\n");
|
||||
}
|
||||
|
||||
private:
|
||||
mutex_t mutex_;
|
||||
map_t map_;
|
||||
labels_t labels_;
|
||||
FILE* fdes_;
|
||||
const char* path_;
|
||||
};
|
||||
|
||||
typedef EvtStatsT<uint32_t, uint64_t> EvtStats;
|
||||
|
||||
#endif // EVT_STATS_H_
|
||||
@@ -37,15 +37,15 @@
|
||||
|
||||
|
||||
#include <roctracer_ext.h>
|
||||
#include "src/util/exception.h"
|
||||
#include <roctracer_roctx.h>
|
||||
#include <roctracer_hsa.h>
|
||||
#include <roctracer_hip.h>
|
||||
|
||||
#include "src/core/loader.h"
|
||||
#include "test/tool/trace_buffer.h"
|
||||
#include "util/evt_stats.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/xml.h"
|
||||
#include "loader.h"
|
||||
#include "trace_buffer.h"
|
||||
#include "evt_stats.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
@@ -218,7 +218,8 @@ void flush_thr_fun() {
|
||||
while (!stop_flush_thread) {
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
roctracer::TraceBufferBase::FlushAll();
|
||||
std::this_thread::sleep_until(std::chrono::steady_clock::now() + std::chrono::microseconds(control_flush_us));
|
||||
std::this_thread::sleep_until(std::chrono::steady_clock::now() +
|
||||
std::chrono::microseconds(control_flush_us));
|
||||
}
|
||||
}
|
||||
|
||||
+457
@@ -0,0 +1,457 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_UTIL_XML_H_
|
||||
#define TEST_UTIL_XML_H_
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace xml {
|
||||
|
||||
class Xml {
|
||||
public:
|
||||
typedef std::vector<char> token_t;
|
||||
|
||||
struct level_t;
|
||||
typedef std::vector<level_t*> nodes_t;
|
||||
typedef std::map<std::string, std::string> opts_t;
|
||||
struct level_t {
|
||||
std::string tag;
|
||||
nodes_t nodes;
|
||||
opts_t opts;
|
||||
};
|
||||
typedef std::vector<level_t*> nodes_vec_t;
|
||||
typedef std::map<std::string, nodes_vec_t> map_t;
|
||||
|
||||
enum { DECL_STATE, BODY_STATE };
|
||||
|
||||
static Xml* Create(const std::string& file_name, const Xml* obj = NULL) {
|
||||
Xml* xml = new Xml(file_name, obj);
|
||||
if (xml != NULL) {
|
||||
if (xml->Init() == false) {
|
||||
delete xml;
|
||||
xml = NULL;
|
||||
} else {
|
||||
const std::size_t pos = file_name.rfind('/');
|
||||
const std::string path = (pos != std::string::npos) ? file_name.substr(0, pos + 1) : "";
|
||||
|
||||
xml->PreProcess();
|
||||
nodes_t incl_nodes;
|
||||
for (auto* node : xml->GetNodes("top.include")) {
|
||||
if (node->opts.find("touch") == node->opts.end()) {
|
||||
node->opts["touch"] = "";
|
||||
incl_nodes.push_back(node);
|
||||
}
|
||||
}
|
||||
for (auto* incl : incl_nodes) {
|
||||
const std::string& incl_name = path + incl->opts["file"];
|
||||
Xml* ixml = Create(incl_name, xml);
|
||||
if (ixml == NULL) {
|
||||
delete xml;
|
||||
xml = NULL;
|
||||
break;
|
||||
} else {
|
||||
delete ixml;
|
||||
}
|
||||
}
|
||||
if (xml) {
|
||||
xml->Process();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
static void Destroy(Xml* xml) { delete xml; }
|
||||
|
||||
std::string GetName() { return file_name_; }
|
||||
|
||||
void AddExpr(const std::string& full_tag, const std::string& name, const std::string& expr) {
|
||||
const std::size_t pos = full_tag.rfind('.');
|
||||
const std::size_t pos1 = (pos == std::string::npos) ? 0 : pos + 1;
|
||||
const std::string level_tag = full_tag.substr(pos1);
|
||||
level_t* level = new level_t;
|
||||
(*map_)[full_tag].push_back(level);
|
||||
level->tag = level_tag;
|
||||
level->opts["name"] = name;
|
||||
level->opts["expr"] = expr;
|
||||
}
|
||||
|
||||
void AddConst(const std::string& full_tag, const std::string& name, const uint64_t& val) {
|
||||
std::ostringstream oss;
|
||||
oss << val;
|
||||
AddExpr(full_tag, name, oss.str());
|
||||
}
|
||||
|
||||
nodes_t GetNodes(const std::string& global_tag) { return (*map_)[global_tag]; }
|
||||
|
||||
template <class F> F ForEach(const F& f_i) {
|
||||
F f = f_i;
|
||||
if (map_) {
|
||||
for (auto& entry : *map_) {
|
||||
for (auto node : entry.second) {
|
||||
if (f.fun(entry.first, node) == false) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
template <class F> F ForEach(const F& f_i) const {
|
||||
F f = f_i;
|
||||
if (map_) {
|
||||
for (auto& entry : *map_) {
|
||||
for (auto node : entry.second) {
|
||||
if (f.fun(entry.first, node) == false) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
struct print_func {
|
||||
bool fun(const std::string& global_tag, level_t* node) {
|
||||
for (auto& opt : node->opts) {
|
||||
std::cout << global_tag << "." << opt.first << " = " << opt.second << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
void Print() const {
|
||||
std::cout << "XML file '" << file_name_ << "':" << std::endl;
|
||||
ForEach(print_func());
|
||||
}
|
||||
|
||||
private:
|
||||
Xml(const std::string& file_name, const Xml* obj)
|
||||
: file_name_(file_name),
|
||||
file_line_(0),
|
||||
data_size_(0),
|
||||
index_(0),
|
||||
state_(BODY_STATE),
|
||||
comment_(false),
|
||||
included_(false),
|
||||
level_(NULL),
|
||||
map_(NULL) {
|
||||
if (obj != NULL) {
|
||||
map_ = obj->map_;
|
||||
level_ = obj->level_;
|
||||
included_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
struct delete_func {
|
||||
bool fun(const std::string&, level_t* node) {
|
||||
delete node;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
~Xml() {
|
||||
if (included_ == false) {
|
||||
ForEach(delete_func());
|
||||
delete map_;
|
||||
}
|
||||
}
|
||||
|
||||
bool Init() {
|
||||
fd_ = open(file_name_.c_str(), O_RDONLY);
|
||||
if (fd_ == -1) {
|
||||
// perror((std::string("open XML file ") + file_name_).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (map_ == NULL) {
|
||||
map_ = new map_t;
|
||||
if (map_ == NULL) return false;
|
||||
AddLevel("top");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PreProcess() {
|
||||
uint32_t ind = 0;
|
||||
char buf[kBufSize];
|
||||
bool error = false;
|
||||
|
||||
while (1) {
|
||||
const uint32_t pos = lseek(fd_, 0, SEEK_CUR);
|
||||
uint32_t size = read(fd_, buf, kBufSize);
|
||||
if (size <= 0) break;
|
||||
buf[size - 1] = '\0';
|
||||
|
||||
if (strncmp(buf, "#include \"", 10) == 0) {
|
||||
for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {
|
||||
}
|
||||
if (ind == size) {
|
||||
fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize);
|
||||
error = true;
|
||||
break;
|
||||
}
|
||||
buf[ind] = '\0';
|
||||
size = ind;
|
||||
lseek(fd_, pos + ind + 1, SEEK_SET);
|
||||
|
||||
for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {
|
||||
}
|
||||
if (ind == size) {
|
||||
error = true;
|
||||
break;
|
||||
}
|
||||
buf[ind] = '\0';
|
||||
|
||||
AddLevel("include");
|
||||
AddOption("file", &buf[10]);
|
||||
UpLevel();
|
||||
}
|
||||
}
|
||||
|
||||
if (error) {
|
||||
fprintf(stderr, "XML PreProcess failed, line '%s'\n", buf);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
lseek(fd_, 0, SEEK_SET);
|
||||
}
|
||||
|
||||
void Process() {
|
||||
token_t remainder;
|
||||
|
||||
while (1) {
|
||||
token_t token = (remainder.size()) ? remainder : NextToken();
|
||||
remainder.clear();
|
||||
|
||||
// token_t token1 = token;
|
||||
// token1.push_back('\0');
|
||||
// std::cout << "> " << &token1[0] << std::endl;
|
||||
|
||||
// End of file
|
||||
if (token.size() == 0) break;
|
||||
|
||||
switch (state_) {
|
||||
case BODY_STATE:
|
||||
if (token[0] == '<') {
|
||||
bool node_begin = true;
|
||||
unsigned ind = 1;
|
||||
if (token[1] == '/') {
|
||||
node_begin = false;
|
||||
++ind;
|
||||
}
|
||||
|
||||
unsigned i = ind;
|
||||
while (i < token.size()) {
|
||||
if (token[i] == '>') break;
|
||||
++i;
|
||||
}
|
||||
for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
|
||||
if (i == token.size()) {
|
||||
if (node_begin)
|
||||
state_ = DECL_STATE;
|
||||
else
|
||||
BadFormat(token);
|
||||
token.push_back('\0');
|
||||
} else {
|
||||
token[i] = '\0';
|
||||
}
|
||||
|
||||
const char* tag = &token[ind];
|
||||
if (node_begin) {
|
||||
AddLevel(tag);
|
||||
} else {
|
||||
if (strncmp(CurrentLevel().c_str(), tag, strlen(tag)) != 0) {
|
||||
token.back() = '>';
|
||||
BadFormat(token);
|
||||
}
|
||||
UpLevel();
|
||||
}
|
||||
} else {
|
||||
BadFormat(token);
|
||||
}
|
||||
break;
|
||||
case DECL_STATE:
|
||||
if (token[0] == '>') {
|
||||
state_ = BODY_STATE;
|
||||
for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
continue;
|
||||
} else {
|
||||
token.push_back('\0');
|
||||
unsigned j = 0;
|
||||
for (j = 0; j < token.size(); ++j)
|
||||
if (token[j] == '=') break;
|
||||
if (j == token.size()) BadFormat(token);
|
||||
token[j] = '\0';
|
||||
const char* key = &token[0];
|
||||
const char* value = &token[j + 1];
|
||||
AddOption(key, value);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout << "XML parser error: wrong state: " << state_ << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SpaceCheck() const {
|
||||
bool cond = ((buffer_[index_] == ' ') || (buffer_[index_] == '\t'));
|
||||
return cond;
|
||||
}
|
||||
|
||||
bool LineEndCheck() {
|
||||
bool found = false;
|
||||
if (buffer_[index_] == '\n') {
|
||||
buffer_[index_] = ' ';
|
||||
++file_line_;
|
||||
found = true;
|
||||
comment_ = false;
|
||||
} else if (comment_ || (buffer_[index_] == '#')) {
|
||||
found = true;
|
||||
comment_ = true;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
token_t NextToken() {
|
||||
token_t token;
|
||||
bool in_string = false;
|
||||
bool special_symb = false;
|
||||
|
||||
while (1) {
|
||||
if (data_size_ == 0) {
|
||||
data_size_ = read(fd_, buffer_, kBufSize);
|
||||
if (data_size_ <= 0) break;
|
||||
}
|
||||
|
||||
if (token.empty()) {
|
||||
while ((index_ < data_size_) && (SpaceCheck() || LineEndCheck())) {
|
||||
++index_;
|
||||
}
|
||||
}
|
||||
while ((index_ < data_size_) && (in_string || !(SpaceCheck() || LineEndCheck()))) {
|
||||
const char symb = buffer_[index_];
|
||||
bool skip_symb = false;
|
||||
|
||||
switch (symb) {
|
||||
case '\\':
|
||||
if (special_symb) {
|
||||
special_symb = false;
|
||||
} else {
|
||||
special_symb = true;
|
||||
skip_symb = true;
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
if (special_symb) {
|
||||
special_symb = false;
|
||||
} else {
|
||||
in_string = !in_string;
|
||||
if (!in_string) {
|
||||
buffer_[index_] = ' ';
|
||||
--index_;
|
||||
}
|
||||
skip_symb = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!skip_symb) token.push_back(symb);
|
||||
++index_;
|
||||
}
|
||||
|
||||
if (index_ == data_size_) {
|
||||
index_ = 0;
|
||||
data_size_ = 0;
|
||||
} else {
|
||||
if (special_symb || in_string) BadFormat(token);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
void BadFormat(token_t token) {
|
||||
token.push_back('\0');
|
||||
std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '"
|
||||
<< &token[0] << "'" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void AddLevel(const std::string& tag) {
|
||||
level_t* level = new level_t;
|
||||
level->tag = tag;
|
||||
if (level_) {
|
||||
level_->nodes.push_back(level);
|
||||
stack_.push_back(level_);
|
||||
}
|
||||
level_ = level;
|
||||
|
||||
std::string global_tag;
|
||||
for (level_t* level : stack_) {
|
||||
global_tag += level->tag + ".";
|
||||
}
|
||||
global_tag += tag;
|
||||
(*map_)[global_tag].push_back(level_);
|
||||
}
|
||||
|
||||
void UpLevel() {
|
||||
level_ = stack_.back();
|
||||
stack_.pop_back();
|
||||
}
|
||||
|
||||
std::string CurrentLevel() const { return level_->tag; }
|
||||
|
||||
void AddOption(const std::string& key, const std::string& value) { level_->opts[key] = value; }
|
||||
|
||||
const std::string file_name_;
|
||||
unsigned file_line_;
|
||||
int fd_;
|
||||
|
||||
static const size_t kBufSize = 256;
|
||||
char buffer_[kBufSize];
|
||||
|
||||
unsigned data_size_;
|
||||
unsigned index_;
|
||||
unsigned state_;
|
||||
bool comment_;
|
||||
std::vector<level_t*> stack_;
|
||||
bool included_;
|
||||
level_t* level_;
|
||||
map_t* map_;
|
||||
};
|
||||
|
||||
} // namespace xml
|
||||
|
||||
#endif // TEST_UTIL_XML_H_
|
||||
+9
-22
@@ -32,16 +32,16 @@ set(GEN_INC_DIR ${PROJECT_BINARY_DIR}/inc)
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "/opt/rocm/hip/cmake")
|
||||
find_package(HIP REQUIRED MODULE)
|
||||
|
||||
set_source_files_properties(MatrixTranspose/MatrixTranspose.cpp MatrixTranspose_test/MatrixTranspose.cpp
|
||||
set_source_files_properties(hip/MatrixTranspose.cpp app/MatrixTranspose_test.cpp
|
||||
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
|
||||
hip_add_executable(MatrixTranspose EXCLUDE_FROM_ALL MatrixTranspose/MatrixTranspose.cpp)
|
||||
hip_add_executable(MatrixTranspose EXCLUDE_FROM_ALL hip/MatrixTranspose.cpp)
|
||||
target_include_directories(MatrixTranspose PRIVATE ${PROJECT_SOURCE_DIR}/inc)
|
||||
target_link_libraries(MatrixTranspose PRIVATE roctracer roctx)
|
||||
add_dependencies(mytest MatrixTranspose)
|
||||
|
||||
function(build_matrix_transpose_test OUTPUT_FILE DEFINITIONS)
|
||||
hip_add_executable(${OUTPUT_FILE} EXCLUDE_FROM_ALL MatrixTranspose_test/MatrixTranspose.cpp)
|
||||
hip_add_executable(${OUTPUT_FILE} EXCLUDE_FROM_ALL app/MatrixTranspose_test.cpp)
|
||||
target_compile_definitions(${OUTPUT_FILE} PRIVATE ITERATIONS=100 HIP_TEST=1 ${DEFINITIONS})
|
||||
target_include_directories(${OUTPUT_FILE} PRIVATE ${PROJECT_SOURCE_DIR}/inc ${GEN_INC_DIR})
|
||||
target_link_libraries(${OUTPUT_FILE} PRIVATE roctracer roctx)
|
||||
@@ -54,7 +54,7 @@ build_matrix_transpose_test(MatrixTranspose_hipaact_test HIP_API_ACTIVITY_ON=1)
|
||||
build_matrix_transpose_test(MatrixTranspose_mgpu MGPU_TEST=1)
|
||||
|
||||
add_custom_command(OUTPUT MatrixTranspose.c
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/MatrixTranspose_test/MatrixTranspose.cpp MatrixTranspose.c)
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/app/MatrixTranspose_test.cpp MatrixTranspose.c)
|
||||
|
||||
hip_add_executable(MatrixTranspose_ctest EXCLUDE_FROM_ALL MatrixTranspose.c)
|
||||
target_compile_definitions(MatrixTranspose_ctest PRIVATE HIP_TEST=0 __HIP_PLATFORM_HCC__)
|
||||
@@ -62,27 +62,14 @@ target_include_directories(MatrixTranspose_ctest PRIVATE ${PROJECT_SOURCE_DIR}/i
|
||||
target_link_libraries(MatrixTranspose_ctest PRIVATE roctracer roctx)
|
||||
add_dependencies(mytest MatrixTranspose_ctest)
|
||||
|
||||
file(GLOB files "${CMAKE_CURRENT_SOURCE_DIR}/golden_traces/*_trace.txt")
|
||||
file(GLOB files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "golden_traces/tests_trace_cmp_levels.txt" "golden_traces/*_trace.txt")
|
||||
foreach(file ${files})
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${file} ${PROJECT_BINARY_DIR}/test/)
|
||||
configure_file(${file} ${PROJECT_BINARY_DIR}/test/${file} COPYONLY)
|
||||
endforeach()
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/golden_traces/tests_trace_cmp_levels.txt ${PROJECT_BINARY_DIR}/test/)
|
||||
|
||||
## Build HSA test
|
||||
add_subdirectory(hsa/test ${PROJECT_BINARY_DIR}/test/hsa)
|
||||
|
||||
if(DEFINED ROCTRACER_TARGET)
|
||||
## Build the tracer_tool library
|
||||
file(GLOB TRACER_TOOL_SOURCES "tool/*.cpp" "${PROJECT_SOURCE_DIR}/src/util/*.cpp")
|
||||
add_library(roctracer_tool SHARED ${TRACER_TOOL_SOURCES})
|
||||
target_compile_definitions(roctracer_tool PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__)
|
||||
target_include_directories(roctracer_tool PRIVATE hsa/test ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${HIP_INCLUDE_DIRECTORIES} ${GEN_INC_DIR})
|
||||
target_link_libraries(roctracer_tool ${ROCTRACER_TARGET} hsa-runtime64::hsa-runtime64 Threads::Threads atomic dl)
|
||||
set_target_properties(roctracer_tool PROPERTIES CXX_VISIBILITY_PRESET hidden LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/tool/exportmap)
|
||||
target_link_options(roctracer_tool PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/tool/exportmap -Wl,--no-undefined)
|
||||
install(TARGETS roctracer_tool LIBRARY DESTINATION lib/${ROCTRACER_NAME})
|
||||
endif ()
|
||||
|
||||
## Build hsaco_test.cpp referenc test
|
||||
add_library(hsaco_test SHARED EXCLUDE_FROM_ALL app/hsaco_test.cpp)
|
||||
target_compile_definitions(hsaco_test PRIVATE AMD_INTERNAL_BUILD)
|
||||
@@ -97,12 +84,12 @@ add_dependencies(mytest codeobj_test)
|
||||
|
||||
## Build the trace_buffer test
|
||||
add_executable(trace_buffer EXCLUDE_FROM_ALL directed/trace_buffer.cpp)
|
||||
target_include_directories(trace_buffer PRIVATE ${PROJECT_SOURCE_DIR}/test/tool)
|
||||
target_include_directories(trace_buffer PRIVATE ${PROJECT_SOURCE_DIR}/src/tracer_tool)
|
||||
target_link_libraries(trace_buffer Threads::Threads atomic)
|
||||
add_dependencies(mytest trace_buffer)
|
||||
|
||||
## copying run script
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/run.sh ${PROJECT_BINARY_DIR})
|
||||
configure_file(run.sh ${PROJECT_BINARY_DIR} COPYONLY)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink run.sh ${PROJECT_BINARY_DIR}/run_ci.sh)
|
||||
## copying tests output check script
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/script/check_trace.py ${PROJECT_BINARY_DIR}/test/.)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/script/check_trace.py ${PROJECT_BINARY_DIR}/test/check_trace.py COPYONLY)
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
## Writing first HIP program ###
|
||||
|
||||
This tutorial shows how to get write simple HIP application. We will write the simplest Matrix Transpose program.
|
||||
|
||||
## HIP Introduction:
|
||||
|
||||
HIP is a C++ runtime API and kernel language that allows developers to create portable applications that can run on AMD and other GPU’s. Our goal was to rise above the lowest-common-denominator paths and deliver a solution that allows you, the developer, to use essential hardware features and maximize your application’s performance on GPU hardware.
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
Programmers familiar with CUDA, OpenCL will be able to quickly learn and start coding with the HIP API. In case you are not, don't worry. You choose to start with the best one. We'll be explaining everything assuming you are completely new to gpgpu programming.
|
||||
|
||||
## Simple Matrix Transpose
|
||||
|
||||
Here is simple example showing how to write your first program in HIP.
|
||||
In order to use the HIP framework, we need to add the "hip_runtime.h" header file. SInce its c++ api you can add any header file you have been using earlier while writing your c/c++ program. For gpgpu programming, we have host(microprocessor) and the device(gpu).
|
||||
|
||||
## Device-side code
|
||||
We will work on device side code first, Here is simple example showing a snippet of HIP device side code:
|
||||
|
||||
`__global__ void matrixTranspose(float *out, `
|
||||
` float *in, `
|
||||
` const int width, `
|
||||
` const int height) `
|
||||
`{ `
|
||||
` int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; `
|
||||
` int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; `
|
||||
` `
|
||||
` out[y * width + x] = in[x * height + y]; `
|
||||
`} `
|
||||
|
||||
`__global__` keyword is the Function-Type Qualifiers, it is used with functions that are executed on device and are called/launched from the hosts.
|
||||
other function-type qualifiers are:
|
||||
`__device__` functions are Executed on the device and Called from the device only
|
||||
`__host__` functions are Executed on the host and Called from the host
|
||||
|
||||
`__host__` can combine with `__device__`, in which case the function compiles for both the host and device. These functions cannot use the HIP grid coordinate functions (for example, "hipThreadIdx_x", will talk about it latter). A possible workaround is to pass the necessary coordinate info as an argument to the function.
|
||||
`__host__` cannot combine with `__global__`.
|
||||
|
||||
`__global__` functions are often referred to as *kernels*, and calling one is termed *launching the kernel*.
|
||||
|
||||
Next keyword is `void`. HIP `__global__` functions must have a `void` return type. Global functions require the caller to specify an "execution configuration" that includes the grid and block dimensions. The execution configuration can also include other information for the launch, such as the amount of additional shared memory to allocate and the stream where the kernel should execute.
|
||||
|
||||
The kernel function begins with
|
||||
` int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;`
|
||||
` int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;`
|
||||
here the keyword hipBlockIdx_x, hipBlockIdx_y and hipBlockIdx_z(not used here) are the built-in functions to identify the threads in a block. The keyword hipBlockDim_x, hipBlockDim_y and hipBlockDim_z(not used here) are to identify the dimensions of the block.
|
||||
|
||||
We are familiar with rest of the code on device-side.
|
||||
|
||||
## Host-side code
|
||||
|
||||
Now, we'll see how to call the kernel from the host. Inside the main() function, we first defined the pointers(for both, the host-side as well as device). The declaration of device pointer is similar to that of the host. Next, we have `hipDeviceProp_t`, it is the pre-defined struct for hip device properties. This is followed by `hipGetDeviceProperties(&devProp, 0)` It is used to extract the device information. The first parameter is the struct, second parameter is the device number to get properties for. Next line print the name of the device.
|
||||
|
||||
We allocated memory to the Matrix on host side by using malloc and initiallized it. While in order to allocate memory on device side we will be using `hipMalloc`, it's quiet similar to that of malloc instruction. After this, we will copy the data to the allocated memory on device-side using `hipMemcpy`.
|
||||
` hipMemcpy(gpuMatrix, Matrix, NUM*sizeof(float), hipMemcpyHostToDevice);`
|
||||
here the first parameter is the destination pointer, second is the source pointer, third is the size of memory copy and the last specify the direction on memory copy(which is in this case froom host to device). While in order to transfer memory from device to host, use `hipMemcpyDeviceToHost` and for device to device memory copy use `hipMemcpyDeviceToDevice`.
|
||||
|
||||
Now, we'll see how to launch the kernel.
|
||||
` hipLaunchKernelGGL(matrixTranspose, `
|
||||
` dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y), `
|
||||
` dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), `
|
||||
` 0, 0, `
|
||||
` gpuTransposeMatrix , gpuMatrix, WIDTH ,HEIGHT); `
|
||||
|
||||
HIP introduces a standard C++ calling convention to pass the execution configuration to the kernel (this convention replaces the `Cuda <<< >>>` syntax). In HIP,
|
||||
- Kernels launch with the `"hipLaunchKernelGGL"` function
|
||||
- The first five parameters to hipLaunchKernelGGL are the following:
|
||||
- **symbol kernelName**: the name of the kernel to launch. To support template kernels which contains "," use the HIP_KERNEL_NAME macro. In current application it's "matrixTranspose".
|
||||
- **dim3 gridDim**: 3D-grid dimensions specifying the number of blocks to launch. In MatrixTranspose sample, it's "dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y)".
|
||||
- **dim3 blockDim**: 3D-block dimensions specifying the number of threads in each block.In MatrixTranspose sample, it's "dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y)".
|
||||
- **size_t dynamicShared**: amount of additional shared memory to allocate when launching the kernel. In MatrixTranspose sample, it's '0'.
|
||||
- **hipStream_t**: stream where the kernel should execute. A value of 0 corresponds to the NULL stream.In MatrixTranspose sample, it's '0'.
|
||||
- Kernel arguments follow these first five parameters. Here, these are "gpuTransposeMatrix , gpuMatrix, WIDTH ,HEIGHT".
|
||||
|
||||
Next, we'll copy the computed values/data back to the device using the `hipMemcpy`. Here the last parameter will be `hipMemcpyDeviceToHost`
|
||||
|
||||
After, copying the data from device to memory, we will verify it with the one we computed with the cpu reference funtion.
|
||||
|
||||
Finally, we will free the memory allocated earlier by using free() for host while for devices we will use `hipFree`.
|
||||
|
||||
## How to build and run:
|
||||
Use the make command and execute it using ./exe
|
||||
Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [hipify-clang](https://github.com/ROCm-Developer-Tools/HIP/hipify-clang/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
@@ -1,101 +0,0 @@
|
||||
## Writing first HIP program ###
|
||||
|
||||
This tutorial shows how to get write simple HIP application. We will write the simplest Matrix Transpose program.
|
||||
|
||||
## HIP Introduction:
|
||||
|
||||
HIP is a C++ runtime API and kernel language that allows developers to create portable applications that can run on AMD and other GPU’s. Our goal was to rise above the lowest-common-denominator paths and deliver a solution that allows you, the developer, to use essential hardware features and maximize your application’s performance on GPU hardware.
|
||||
|
||||
## Requirement:
|
||||
For hardware requirement and software installation [Installation](https://github.com/ROCm-Developer-Tools/HIP/INSTALL.md)
|
||||
|
||||
## prerequiste knowledge:
|
||||
|
||||
Programmers familiar with CUDA, OpenCL will be able to quickly learn and start coding with the HIP API. In case you are not, don't worry. You choose to start with the best one. We'll be explaining everything assuming you are completely new to gpgpu programming.
|
||||
|
||||
## Simple Matrix Transpose
|
||||
|
||||
Here is simple example showing how to write your first program in HIP.
|
||||
In order to use the HIP framework, we need to add the "hip_runtime.h" header file. SInce its c++ api you can add any header file you have been using earlier while writing your c/c++ program. For gpgpu programming, we have host(microprocessor) and the device(gpu).
|
||||
|
||||
## Device-side code
|
||||
We will work on device side code first, Here is simple example showing a snippet of HIP device side code:
|
||||
|
||||
```
|
||||
__global__ void matrixTranspose(float *out,
|
||||
float *in,
|
||||
const int width,
|
||||
const int height)
|
||||
{
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
out[y * width + x] = in[x * height + y];
|
||||
}
|
||||
```
|
||||
|
||||
`__global__` keyword is the Function-Type Qualifiers, it is used with functions that are executed on device and are called/launched from the hosts.
|
||||
other function-type qualifiers are:
|
||||
`__device__` functions are Executed on the device and Called from the device only
|
||||
`__host__` functions are Executed on the host and Called from the host
|
||||
|
||||
`__host__` can combine with `__device__`, in which case the function compiles for both the host and device. These functions cannot use the HIP grid coordinate functions (for example, "hipThreadIdx_x", will talk about it latter). A possible workaround is to pass the necessary coordinate info as an argument to the function.
|
||||
`__host__` cannot combine with `__global__`.
|
||||
|
||||
`__global__` functions are often referred to as *kernels*, and calling one is termed *launching the kernel*.
|
||||
|
||||
Next keyword is `void`. HIP `__global__` functions must have a `void` return type. Global functions require the caller to specify an "execution configuration" that includes the grid and block dimensions. The execution configuration can also include other information for the launch, such as the amount of additional shared memory to allocate and the stream where the kernel should execute.
|
||||
|
||||
The kernel function begins with
|
||||
` int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;`
|
||||
` int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;`
|
||||
here the keyword hipBlockIdx_x, hipBlockIdx_y and hipBlockIdx_z(not used here) are the built-in functions to identify the threads in a block. The keyword hipBlockDim_x, hipBlockDim_y and hipBlockDim_z(not used here) are to identify the dimensions of the block.
|
||||
|
||||
We are familiar with rest of the code on device-side.
|
||||
|
||||
## Host-side code
|
||||
|
||||
Now, we'll see how to call the kernel from the host. Inside the main() function, we first defined the pointers(for both, the host-side as well as device). The declaration of device pointer is similar to that of the host. Next, we have `hipDeviceProp_t`, it is the pre-defined struct for hip device properties. This is followed by `hipGetDeviceProperties(&devProp, 0)` It is used to extract the device information. The first parameter is the struct, second parameter is the device number to get properties for. Next line print the name of the device.
|
||||
|
||||
We allocated memory to the Matrix on host side by using malloc and initiallized it. While in order to allocate memory on device side we will be using `hipMalloc`, it's quiet similar to that of malloc instruction. After this, we will copy the data to the allocated memory on device-side using `hipMemcpy`.
|
||||
` hipMemcpy(gpuMatrix, Matrix, NUM*sizeof(float), hipMemcpyHostToDevice);`
|
||||
here the first parameter is the destination pointer, second is the source pointer, third is the size of memory copy and the last specify the direction on memory copy(which is in this case froom host to device). While in order to transfer memory from device to host, use `hipMemcpyDeviceToHost` and for device to device memory copy use `hipMemcpyDeviceToDevice`.
|
||||
|
||||
Now, we'll see how to launch the kernel.
|
||||
```
|
||||
hipLaunchKernelGGL(matrixTranspose,
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
gpuTransposeMatrix , gpuMatrix, WIDTH ,HEIGHT);
|
||||
```
|
||||
|
||||
HIP introduces a standard C++ calling convention to pass the execution configuration to the kernel (this convention replaces the `Cuda <<< >>>` syntax). In HIP,
|
||||
- Kernels launch with the `"hipLaunchKernelGGL"` function
|
||||
- The first five parameters to hipLaunchKernelGGL are the following:
|
||||
- **symbol kernelName**: the name of the kernel to launch. To support template kernels which contains "," use the HIP_KERNEL_NAME macro. In current application it's "matrixTranspose".
|
||||
- **dim3 gridDim**: 3D-grid dimensions specifying the number of blocks to launch. In MatrixTranspose sample, it's "dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y)".
|
||||
- **dim3 blockDim**: 3D-block dimensions specifying the number of threads in each block.In MatrixTranspose sample, it's "dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y)".
|
||||
- **size_t dynamicShared**: amount of additional shared memory to allocate when launching the kernel. In MatrixTranspose sample, it's '0'.
|
||||
- **hipStream_t**: stream where the kernel should execute. A value of 0 corresponds to the NULL stream.In MatrixTranspose sample, it's '0'.
|
||||
- Kernel arguments follow these first five parameters. Here, these are "gpuTransposeMatrix , gpuMatrix, WIDTH ,HEIGHT".
|
||||
|
||||
Next, we'll copy the computed values/data back to the device using the `hipMemcpy`. Here the last parameter will be `hipMemcpyDeviceToHost`
|
||||
|
||||
After, copying the data from device to memory, we will verify it with the one we computed with the cpu reference funtion.
|
||||
|
||||
Finally, we will free the memory allocated earlier by using free() for host while for devices we will use `hipFree`.
|
||||
|
||||
## How to build and run:
|
||||
Use the make command and execute it using ./exe
|
||||
Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia.
|
||||
|
||||
## More Info:
|
||||
- [HIP FAQ](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_faq.md)
|
||||
- [HIP Kernel Language](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [hipify-clang](https://github.com/ROCm-Developer-Tools/HIP/hipify-clang/README.md)
|
||||
- [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/CONTRIBUTING.md)
|
||||
- [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/RELEASE.md)
|
||||
+9
-9
@@ -46,7 +46,7 @@ if [ -z "$ROCTRACER_LIB_PATH" ] ; then
|
||||
ROCTRACER_LIB_PATH="."
|
||||
fi
|
||||
if [ -z "$ROCTRACER_TOOL_PATH" ] ; then
|
||||
ROCTRACER_TOOL_PATH="./test"
|
||||
ROCTRACER_TOOL_PATH="."
|
||||
fi
|
||||
|
||||
# test filter input
|
||||
@@ -74,19 +74,19 @@ eval_test() {
|
||||
label=$1
|
||||
cmdline=$2
|
||||
test_name=$3
|
||||
test_trace=$test_name.txt
|
||||
|
||||
if [ $test_filter = -1 -o $test_filter = $test_number ] ; then
|
||||
echo "test $test_number: $test_name \"$label\""
|
||||
echo "CMD: \"$cmdline\""
|
||||
mkdir -p test/out
|
||||
test_runnum=$((test_runnum + 1))
|
||||
eval "$cmdline" 1>$test_trace 2>$test_name.err
|
||||
eval "$cmdline" 1>test/out/$test_name.out 2>test/out/$test_name.err
|
||||
is_failed=$?
|
||||
if [ $is_failed != 0 ] ; then
|
||||
echo "--- stdout ---"
|
||||
cat $test_trace
|
||||
cat test/out/$test_name.out
|
||||
echo "--- stderr ---"
|
||||
cat $test_name.err
|
||||
cat test/out/$test_name.err
|
||||
fi
|
||||
if [ $IS_CI = 1 ] ; then
|
||||
is_failed=0;
|
||||
@@ -138,8 +138,8 @@ eval_test "tool period test" "ROCP_CTRL_RATE=10:50000:500000 ./test/MatrixTransp
|
||||
eval_test "tool flushing test" "ROCP_FLUSH_RATE=100000 ./test/MatrixTranspose" MatrixTranspose_hip_flush_trace
|
||||
|
||||
#API records filtering
|
||||
echo "<trace name=\"HIP\"><parameters api=\"hipFree, hipMalloc, hipMemcpy\"></parameters></trace>" > input.xml
|
||||
export ROCP_INPUT=input.xml
|
||||
echo "<trace name=\"HIP\"><parameters api=\"hipFree, hipMalloc, hipMemcpy\"></parameters></trace>" > test/input.xml
|
||||
export ROCP_INPUT=test/input.xml
|
||||
eval_test "tool HIP test input" ./test/MatrixTranspose MatrixTranspose_hip_input_trace
|
||||
unset ROCP_INPUT
|
||||
|
||||
@@ -160,8 +160,8 @@ export ROCP_THRS=1
|
||||
|
||||
eval_test "tool HSA test" ./test/hsa/ctrl ctrl_hsa_trace
|
||||
|
||||
echo "<trace name=\"HSA\"><parameters api=\"hsa_agent_get_info, hsa_amd_memory_pool_allocate\"></parameters></trace>" > input.xml
|
||||
export ROCP_INPUT=input.xml
|
||||
echo "<trace name=\"HSA\"><parameters api=\"hsa_agent_get_info, hsa_amd_memory_pool_allocate\"></parameters></trace>" > test/input.xml
|
||||
export ROCP_INPUT=test/input.xml
|
||||
eval_test "tool HSA test input" ./test/hsa/ctrl ctrl_hsa_input_trace
|
||||
unset ROCP_INPUT
|
||||
|
||||
|
||||
Reference in New Issue
Block a user