[ROCm/clr commit: 2b77ca7d2e]
Этот коммит содержится в:
Evgeny Mankov
2020-01-24 16:14:16 +03:00
родитель 8274fd3723 6fafadae75
Коммит 0f6cbe5ef2
8 изменённых файлов: 56 добавлений и 26 удалений
+1
Просмотреть файл
@@ -20,6 +20,7 @@ endmacro()
# Setup version information
#############################
# Determine HIP_BASE_VERSION
set(ENV{HIP_PATH} "")
execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bin/hipconfig --version
OUTPUT_VARIABLE HIP_BASE_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
+1 -1
Просмотреть файл
@@ -48,7 +48,7 @@ endif()
set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH})
set(CPACK_GENERATOR "TGZ;DEB;RPM")
set(CPACK_BINARY_DEB "ON")
set(CPACK_DEBIAN_PACKAGE_DEPENDS "cuda >= 7.0")
set(CPACK_DEBIAN_PACKAGE_DEPENDS "cuda (>= 7.0)")
set(CPACK_BINARY_RPM "ON")
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
set(CPACK_RPM_PACKAGE_AUTOREQPROV "NO")
+2 -2
Просмотреть файл
@@ -106,7 +106,7 @@ __CG_STATIC_QUALIFIER__ uint32_t size() {
}
__CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
// Compute global id of the workgroup to which the current threads belongs to
// Compute global id of the workgroup to which the current thread belongs to
uint32_t blkIdx =
(uint32_t)((hipBlockIdx_z * hipGridDim_y * hipGridDim_x) +
(hipBlockIdx_y * hipGridDim_x) +
@@ -115,7 +115,7 @@ __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
// Compute total number of threads being passed to reach current workgroup
// within grid
uint32_t num_threads_till_current_workgroup =
(uint32_t)(blkIdx * (hipBlockIdx_x * hipBlockIdx_y * hipBlockIdx_z));
(uint32_t)(blkIdx * (hipBlockDim_x * hipBlockDim_y * hipBlockDim_z));
// Compute thread local rank within current workgroup
uint32_t local_thread_rank =
+1 -1
Просмотреть файл
@@ -3,6 +3,6 @@
ROCMDIR=@ROCM_PATH@
HIPDIR=$ROCMDIR/hip
if [ -d $ROCMDIR]
if [ -d $ROCMDIR] ; then
ln -s -f $ROCMDIR /opt/rocm
fi
+1 -1
Просмотреть файл
@@ -1,5 +1,5 @@
#!/bin/bash
if [ -L "/opt/rocm" ]
if [ -L "/opt/rocm" ] ; then
unlink /opt/rocm
fi
+1 -1
Просмотреть файл
@@ -61,7 +61,7 @@ namespace hip_impl {
if (it == impl->get_globals().end())
return nullptr;
else
return it->second;
return it->second.first;
}
hsa_executable_t program_state::load_executable(const char* data,
+42 -18
Просмотреть файл
@@ -18,6 +18,7 @@
#include <hsa/hsa_ext_amd.h>
#include <hsa/hsa_ven_amd_loader.h>
#include <amd_comgr.h>
#include "hc.hpp"
#include <link.h>
@@ -193,7 +194,8 @@ public:
std::tuple<
std::once_flag,
std::mutex,
std::unordered_map<std::string, void*>> globals;
// map from string to pair<global_addr, pinned_addr>
std::unordered_map<std::string, std::pair<void*, void*>>> globals;
using RAII_code_reader =
std::unique_ptr<hsa_code_object_reader_t,
@@ -308,7 +310,7 @@ public:
return symbol_addresses.second;
}
std::unordered_map<std::string, void*>& get_globals() {
std::unordered_map<std::string, std::pair<void*, void*>>& get_globals() {
std::call_once(std::get<0>(globals), [this]() {
std::get<2>(globals).reserve(get_symbol_addresses().size());
});
@@ -349,30 +351,52 @@ public:
auto& g_mutex = get_globals_mutex();
for (auto&& x : undefined_symbols) {
if (g.find(x) != g.cend()) return;
const auto it1 = get_symbol_addresses().find(x);
if (it1 == get_symbol_addresses().cend()) {
hip_throw(std::runtime_error{
"Global symbol: " + x + " is undefined."});
}
std::lock_guard<std::mutex> lck{g_mutex};
hsa_status_t status;
auto check_hsa_global_var_define_error = [&x](hsa_status_t s) {
if (s != HSA_STATUS_SUCCESS) {
const char* es;
hsa_status_string(s, &es);
hip_throw(std::runtime_error{ "Error when defining symbol " + x + " : " + es});
}
};
if (g.find(x) != g.cend()) return;
auto retrieve_pinned_address_from_cache = [](decltype(g) g, decltype(x) x) {
const auto& global_addr = g.find(x);
if (global_addr != g.cend()) {
return global_addr->second.second;
}
return (void*)nullptr;
};
g.emplace(x, (void*)(it1->second.first));
void* p = nullptr;
hsa_amd_memory_lock(
reinterpret_cast<void*>(it1->second.first),
it1->second.second,
nullptr, // All agents.
0,
&p);
hsa_executable_agent_global_variable_define(
executable, agent, x.c_str(), p);
void* p = retrieve_pinned_address_from_cache(g, x);
if (p == nullptr) {
std::lock_guard<std::mutex> lck{g_mutex};
p = retrieve_pinned_address_from_cache(g, x);
if (p == nullptr) {
if (x == "_ZN2hc13printf_bufferE") {
// This is the printf buffer, get the pinned address from HCC
p = Kalmar::getContext()->getPrintfBufferPointerVA();
}
else {
status = hsa_amd_memory_lock(reinterpret_cast<void*>(it1->second.first),
it1->second.second,
nullptr, // All agents.
0, &p);
check_hsa_global_var_define_error(status);
}
// cache the global address and its pinned address
g.emplace(x, std::make_pair(reinterpret_cast<void*>(it1->second.first), p));
}
}
status = hsa_executable_agent_global_variable_define(
executable, agent, x.c_str(), p);
check_hsa_global_var_define_error(status);
}
}
+7 -2
Просмотреть файл
@@ -30,7 +30,12 @@ THE SOFTWARE.
__global__ void run_printf() { printf("Hello World\n"); }
int main() {
hipLaunchKernelGGL(HIP_KERNEL_NAME(run_printf), dim3(1), dim3(1), 0, 0);
hipDeviceSynchronize();
int device_count = 0;
hipGetDeviceCount(&device_count);
for (int i = 0; i < device_count; ++i) {
hipSetDevice(i);
hipLaunchKernelGGL(HIP_KERNEL_NAME(run_printf), dim3(1), dim3(1), 0, 0);
hipDeviceSynchronize();
}
passed();
}