Merge branch 'master' of https://github.com/ROCm-Developer-Tools/HIP into HIP
[ROCm/clr commit: 2b77ca7d2e]
Этот коммит содержится в:
@@ -20,6 +20,7 @@ endmacro()
|
||||
# Setup version information
|
||||
#############################
|
||||
# Determine HIP_BASE_VERSION
|
||||
set(ENV{HIP_PATH} "")
|
||||
execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bin/hipconfig --version
|
||||
OUTPUT_VARIABLE HIP_BASE_VERSION
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
@@ -48,7 +48,7 @@ endif()
|
||||
set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH})
|
||||
set(CPACK_GENERATOR "TGZ;DEB;RPM")
|
||||
set(CPACK_BINARY_DEB "ON")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "cuda >= 7.0")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "cuda (>= 7.0)")
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV "NO")
|
||||
|
||||
@@ -106,7 +106,7 @@ __CG_STATIC_QUALIFIER__ uint32_t size() {
|
||||
}
|
||||
|
||||
__CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
|
||||
// Compute global id of the workgroup to which the current threads belongs to
|
||||
// Compute global id of the workgroup to which the current thread belongs to
|
||||
uint32_t blkIdx =
|
||||
(uint32_t)((hipBlockIdx_z * hipGridDim_y * hipGridDim_x) +
|
||||
(hipBlockIdx_y * hipGridDim_x) +
|
||||
@@ -115,7 +115,7 @@ __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
|
||||
// Compute total number of threads being passed to reach current workgroup
|
||||
// within grid
|
||||
uint32_t num_threads_till_current_workgroup =
|
||||
(uint32_t)(blkIdx * (hipBlockIdx_x * hipBlockIdx_y * hipBlockIdx_z));
|
||||
(uint32_t)(blkIdx * (hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
|
||||
|
||||
// Compute thread local rank within current workgroup
|
||||
uint32_t local_thread_rank =
|
||||
|
||||
@@ -3,6 +3,6 @@
|
||||
ROCMDIR=@ROCM_PATH@
|
||||
HIPDIR=$ROCMDIR/hip
|
||||
|
||||
if [ -d $ROCMDIR]
|
||||
if [ -d $ROCMDIR] ; then
|
||||
ln -s -f $ROCMDIR /opt/rocm
|
||||
fi
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -L "/opt/rocm" ]
|
||||
if [ -L "/opt/rocm" ] ; then
|
||||
unlink /opt/rocm
|
||||
fi
|
||||
|
||||
@@ -61,7 +61,7 @@ namespace hip_impl {
|
||||
if (it == impl->get_globals().end())
|
||||
return nullptr;
|
||||
else
|
||||
return it->second;
|
||||
return it->second.first;
|
||||
}
|
||||
|
||||
hsa_executable_t program_state::load_executable(const char* data,
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
#include <amd_comgr.h>
|
||||
#include "hc.hpp"
|
||||
|
||||
#include <link.h>
|
||||
|
||||
@@ -193,7 +194,8 @@ public:
|
||||
std::tuple<
|
||||
std::once_flag,
|
||||
std::mutex,
|
||||
std::unordered_map<std::string, void*>> globals;
|
||||
// map from string to pair<global_addr, pinned_addr>
|
||||
std::unordered_map<std::string, std::pair<void*, void*>>> globals;
|
||||
|
||||
using RAII_code_reader =
|
||||
std::unique_ptr<hsa_code_object_reader_t,
|
||||
@@ -308,7 +310,7 @@ public:
|
||||
return symbol_addresses.second;
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, void*>& get_globals() {
|
||||
std::unordered_map<std::string, std::pair<void*, void*>>& get_globals() {
|
||||
std::call_once(std::get<0>(globals), [this]() {
|
||||
std::get<2>(globals).reserve(get_symbol_addresses().size());
|
||||
});
|
||||
@@ -349,30 +351,52 @@ public:
|
||||
auto& g_mutex = get_globals_mutex();
|
||||
for (auto&& x : undefined_symbols) {
|
||||
|
||||
if (g.find(x) != g.cend()) return;
|
||||
|
||||
const auto it1 = get_symbol_addresses().find(x);
|
||||
|
||||
if (it1 == get_symbol_addresses().cend()) {
|
||||
hip_throw(std::runtime_error{
|
||||
"Global symbol: " + x + " is undefined."});
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lck{g_mutex};
|
||||
hsa_status_t status;
|
||||
auto check_hsa_global_var_define_error = [&x](hsa_status_t s) {
|
||||
if (s != HSA_STATUS_SUCCESS) {
|
||||
const char* es;
|
||||
hsa_status_string(s, &es);
|
||||
hip_throw(std::runtime_error{ "Error when defining symbol " + x + " : " + es});
|
||||
}
|
||||
};
|
||||
|
||||
if (g.find(x) != g.cend()) return;
|
||||
auto retrieve_pinned_address_from_cache = [](decltype(g) g, decltype(x) x) {
|
||||
const auto& global_addr = g.find(x);
|
||||
if (global_addr != g.cend()) {
|
||||
return global_addr->second.second;
|
||||
}
|
||||
return (void*)nullptr;
|
||||
};
|
||||
|
||||
g.emplace(x, (void*)(it1->second.first));
|
||||
void* p = nullptr;
|
||||
hsa_amd_memory_lock(
|
||||
reinterpret_cast<void*>(it1->second.first),
|
||||
it1->second.second,
|
||||
nullptr, // All agents.
|
||||
0,
|
||||
&p);
|
||||
|
||||
hsa_executable_agent_global_variable_define(
|
||||
executable, agent, x.c_str(), p);
|
||||
void* p = retrieve_pinned_address_from_cache(g, x);
|
||||
if (p == nullptr) {
|
||||
std::lock_guard<std::mutex> lck{g_mutex};
|
||||
p = retrieve_pinned_address_from_cache(g, x);
|
||||
if (p == nullptr) {
|
||||
if (x == "_ZN2hc13printf_bufferE") {
|
||||
// This is the printf buffer, get the pinned address from HCC
|
||||
p = Kalmar::getContext()->getPrintfBufferPointerVA();
|
||||
}
|
||||
else {
|
||||
status = hsa_amd_memory_lock(reinterpret_cast<void*>(it1->second.first),
|
||||
it1->second.second,
|
||||
nullptr, // All agents.
|
||||
0, &p);
|
||||
check_hsa_global_var_define_error(status);
|
||||
}
|
||||
// cache the global address and its pinned address
|
||||
g.emplace(x, std::make_pair(reinterpret_cast<void*>(it1->second.first), p));
|
||||
}
|
||||
}
|
||||
status = hsa_executable_agent_global_variable_define(
|
||||
executable, agent, x.c_str(), p);
|
||||
check_hsa_global_var_define_error(status);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -30,7 +30,12 @@ THE SOFTWARE.
|
||||
__global__ void run_printf() { printf("Hello World\n"); }
|
||||
|
||||
int main() {
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(run_printf), dim3(1), dim3(1), 0, 0);
|
||||
hipDeviceSynchronize();
|
||||
int device_count = 0;
|
||||
hipGetDeviceCount(&device_count);
|
||||
for (int i = 0; i < device_count; ++i) {
|
||||
hipSetDevice(i);
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(run_printf), dim3(1), dim3(1), 0, 0);
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
passed();
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user