2016-08-17 10:36:28 -05:00
|
|
|
/*
|
2017-03-31 12:11:34 -05:00
|
|
|
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
2016-10-15 22:55:22 +05:30
|
|
|
|
2016-08-17 10:36:28 -05:00
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
furnished to do so, subject to the following conditions:
|
2016-10-15 22:55:22 +05:30
|
|
|
|
2016-08-17 10:36:28 -05:00
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
|
all copies or substantial portions of the Software.
|
2016-10-15 22:55:22 +05:30
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
2016-08-17 10:36:28 -05:00
|
|
|
THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2017-12-03 23:09:06 +00:00
|
|
|
#include "hip/hip_runtime.h"
|
2019-03-06 14:01:44 +02:00
|
|
|
#include "hip/hcc_detail/elfio/elfio.hpp"
|
|
|
|
|
#include "hip/hcc_detail/hsa_helpers.hpp"
|
2017-12-03 23:09:06 +00:00
|
|
|
#include "hip/hcc_detail/program_state.hpp"
|
|
|
|
|
#include "hip_hcc_internal.h"
|
|
|
|
|
#include "trace_helper.h"
|
|
|
|
|
|
2018-05-11 03:35:10 +01:00
|
|
|
#include <hsa/amd_hsa_kernel_code.h>
|
2017-12-03 23:09:06 +00:00
|
|
|
#include <hsa/hsa.h>
|
|
|
|
|
#include <hsa/hsa_ext_amd.h>
|
|
|
|
|
|
2018-05-11 03:35:10 +01:00
|
|
|
#include <algorithm>
|
2017-12-03 23:09:06 +00:00
|
|
|
#include <cassert>
|
2017-10-09 13:27:11 +01:00
|
|
|
#include <cstdint>
|
2017-12-03 23:09:06 +00:00
|
|
|
#include <cstdio>
|
|
|
|
|
#include <cstdlib>
|
|
|
|
|
#include <fstream>
|
|
|
|
|
#include <map>
|
2017-10-09 13:27:11 +01:00
|
|
|
#include <memory>
|
|
|
|
|
#include <mutex>
|
2017-12-03 23:09:06 +00:00
|
|
|
#include <sstream>
|
|
|
|
|
#include <stdexcept>
|
2017-10-09 13:27:11 +01:00
|
|
|
#include <string>
|
2017-12-03 23:09:06 +00:00
|
|
|
#include <tuple>
|
2017-11-03 01:44:48 +00:00
|
|
|
#include <unordered_map>
|
2017-12-03 23:09:06 +00:00
|
|
|
#include <utility>
|
2017-10-09 13:27:11 +01:00
|
|
|
#include <vector>
|
2017-12-28 16:15:45 +05:30
|
|
|
#include "../include/hip/hcc_detail/code_object_bundle.hpp"
|
2018-11-13 22:28:00 -05:00
|
|
|
#include "hip_fatbin.h"
|
2018-03-12 11:29:03 +05:30
|
|
|
// TODO Use Pool APIs from HCC to get memory regions.
|
2016-08-18 11:26:55 -05:00
|
|
|
|
2017-12-03 23:09:06 +00:00
|
|
|
using namespace ELFIO;
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
2019-01-08 17:18:06 +00:00
|
|
|
// calculate MD5 checksum
|
|
|
|
|
inline std::string checksum(size_t size, const char *source) {
|
|
|
|
|
// FNV-1a hashing, 64-bit version
|
|
|
|
|
const uint64_t FNV_prime = 0x100000001b3;
|
|
|
|
|
const uint64_t FNV_basis = 0xcbf29ce484222325;
|
|
|
|
|
uint64_t hash = FNV_basis;
|
|
|
|
|
|
|
|
|
|
const char *str = static_cast<const char *>(source);
|
|
|
|
|
for (auto i = 0; i < size; ++i) {
|
|
|
|
|
hash ^= *str++;
|
|
|
|
|
hash *= FNV_prime;
|
|
|
|
|
}
|
|
|
|
|
return std::to_string(hash);
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-17 13:11:34 -05:00
|
|
|
inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
|
2018-03-12 11:29:03 +05:30
|
|
|
assert(Align != 0u && "Align can't be 0.");
|
|
|
|
|
Skew %= Align;
|
|
|
|
|
return (Value + Align - 1 - Skew) / Align * Align + Skew;
|
2017-03-17 13:11:34 -05:00
|
|
|
}
|
|
|
|
|
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
struct ihipKernArgInfo {
|
|
|
|
|
vector<uint32_t> Size;
|
|
|
|
|
vector<uint32_t> Align;
|
|
|
|
|
vector<string> ArgType;
|
|
|
|
|
vector<string> ArgName;
|
|
|
|
|
uint32_t totalSize;
|
2017-03-17 13:11:34 -05:00
|
|
|
};
|
|
|
|
|
|
2017-12-03 23:09:06 +00:00
|
|
|
map<string, ihipKernArgInfo> kernelArguments;
|
2017-02-09 17:22:55 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
struct ihipModuleSymbol_t {
|
2018-05-11 03:35:10 +01:00
|
|
|
uint64_t _object{}; // The kernel object.
|
|
|
|
|
amd_kernel_code_t const* _header{};
|
2018-03-12 11:29:03 +05:30
|
|
|
string _name; // TODO - review for performance cost. Name is just used for debug.
|
2017-02-09 17:22:55 -06:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <>
|
2018-03-12 11:29:03 +05:30
|
|
|
string ToString(hipFunction_t v) {
|
2017-02-09 17:22:55 -06:00
|
|
|
std::ostringstream ss;
|
|
|
|
|
ss << "0x" << std::hex << v->_object;
|
|
|
|
|
return ss.str();
|
|
|
|
|
};
|
|
|
|
|
|
2018-08-26 17:34:26 -05:00
|
|
|
std::string& FunctionSymbol(hipFunction_t f) { return f->_name; };
|
2017-02-09 17:22:55 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
#define CHECK_HSA(hsaStatus, hipStatus) \
|
|
|
|
|
if (hsaStatus != HSA_STATUS_SUCCESS) { \
|
|
|
|
|
return hipStatus; \
|
|
|
|
|
}
|
2016-12-17 07:21:15 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
#define CHECKLOG_HSA(hsaStatus, hipStatus) \
|
|
|
|
|
if (hsaStatus != HSA_STATUS_SUCCESS) { \
|
|
|
|
|
return ihipLogStatus(hipStatus); \
|
|
|
|
|
}
|
2016-12-17 07:19:22 -06:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hipError_t hipModuleUnload(hipModule_t hmod) {
|
2018-11-08 08:36:50 -06:00
|
|
|
HIP_INIT_API(hipModuleUnload, hmod);
|
2017-10-30 20:18:41 +00:00
|
|
|
|
2016-12-06 16:05:46 +00:00
|
|
|
// TODO - improve this synchronization so it is thread-safe.
|
|
|
|
|
// Currently we want for all inflight activity to complete, but don't prevent another
|
|
|
|
|
// thread from launching new kernels before we finish this operation.
|
|
|
|
|
ihipSynchronize();
|
2016-08-19 08:49:34 -05:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
delete hmod; // The ihipModule_t dtor will clean everything up.
|
2017-12-03 23:09:06 +00:00
|
|
|
hmod = nullptr;
|
2016-08-19 08:49:34 -05:00
|
|
|
|
2017-12-03 23:09:06 +00:00
|
|
|
return ihipLogStatus(hipSuccess);
|
2016-08-18 11:26:55 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
|
|
|
|
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
|
|
|
|
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
|
|
|
|
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
|
|
|
|
hipStream_t hStream, void** kernelParams, void** extra,
|
2019-03-06 12:55:39 +05:30
|
|
|
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
|
2016-08-18 11:26:55 -05:00
|
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
|
|
|
hipError_t ret = hipSuccess;
|
2016-08-19 08:49:34 -05:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (ctx == nullptr) {
|
2016-08-18 11:26:55 -05:00
|
|
|
ret = hipErrorInvalidDevice;
|
2016-08-19 08:49:34 -05:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
} else {
|
2016-08-18 11:26:55 -05:00
|
|
|
int deviceId = ctx->getDevice()->_deviceId;
|
2018-03-12 11:29:03 +05:30
|
|
|
ihipDevice_t* currentDevice = ihipGetDevice(deviceId);
|
2016-08-18 11:26:55 -05:00
|
|
|
hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent;
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
void* config[5] = {0};
|
2016-10-14 23:46:29 -05:00
|
|
|
size_t kernArgSize;
|
2016-08-18 11:26:55 -05:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (kernelParams != NULL) {
|
|
|
|
|
std::string name = f->_name;
|
|
|
|
|
struct ihipKernArgInfo pl = kernelArguments[name];
|
|
|
|
|
char* argBuf = (char*)malloc(pl.totalSize);
|
|
|
|
|
memset(argBuf, 0, pl.totalSize);
|
|
|
|
|
int index = 0;
|
|
|
|
|
for (int i = 0; i < pl.Size.size(); i++) {
|
|
|
|
|
memcpy(argBuf + index, kernelParams[i], pl.Size[i]);
|
|
|
|
|
index += pl.Align[i];
|
|
|
|
|
}
|
|
|
|
|
config[1] = (void*)argBuf;
|
|
|
|
|
kernArgSize = pl.totalSize;
|
|
|
|
|
} else if (extra != NULL) {
|
|
|
|
|
memcpy(config, extra, sizeof(size_t) * 5);
|
|
|
|
|
if (config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER &&
|
|
|
|
|
config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END) {
|
2016-10-14 23:46:29 -05:00
|
|
|
kernArgSize = *(size_t*)(config[3]);
|
2016-09-02 15:49:22 -05:00
|
|
|
} else {
|
2017-10-30 20:18:41 +00:00
|
|
|
return hipErrorNotInitialized;
|
2016-08-18 11:26:55 -05:00
|
|
|
}
|
2017-10-09 13:27:11 +01:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
} else {
|
2017-10-30 20:18:41 +00:00
|
|
|
return hipErrorInvalidValue;
|
2016-08-18 11:26:55 -05:00
|
|
|
}
|
2016-09-07 12:57:18 -05:00
|
|
|
|
|
|
|
|
|
2016-09-02 15:49:22 -05:00
|
|
|
/*
|
|
|
|
|
Kernel argument preparation.
|
|
|
|
|
*/
|
2017-02-09 17:22:55 -06:00
|
|
|
grid_launch_parm lp;
|
2018-03-12 11:29:03 +05:30
|
|
|
lp.dynamic_group_mem_bytes =
|
|
|
|
|
sharedMemBytes; // TODO - this should be part of preLaunchKernel.
|
|
|
|
|
hStream = ihipPreLaunchKernel(
|
|
|
|
|
hStream, dim3(globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ),
|
|
|
|
|
dim3(localWorkSizeX, localWorkSizeY, localWorkSizeZ), &lp, f->_name.c_str());
|
2016-08-19 08:49:34 -05:00
|
|
|
|
2016-12-05 16:55:26 +05:30
|
|
|
|
2016-10-14 23:46:29 -05:00
|
|
|
hsa_kernel_dispatch_packet_t aql;
|
|
|
|
|
|
|
|
|
|
memset(&aql, 0, sizeof(aql));
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
// aql.completion_signal._handle = 0;
|
|
|
|
|
// aql.kernarg_address = 0;
|
2016-10-14 23:46:29 -05:00
|
|
|
|
2017-03-31 12:11:34 -05:00
|
|
|
aql.workgroup_size_x = localWorkSizeX;
|
|
|
|
|
aql.workgroup_size_y = localWorkSizeY;
|
|
|
|
|
aql.workgroup_size_z = localWorkSizeZ;
|
|
|
|
|
aql.grid_size_x = globalWorkSizeX;
|
|
|
|
|
aql.grid_size_y = globalWorkSizeY;
|
|
|
|
|
aql.grid_size_z = globalWorkSizeZ;
|
2018-05-11 03:35:10 +01:00
|
|
|
aql.group_segment_size =
|
|
|
|
|
f->_header->workgroup_group_segment_byte_size + sharedMemBytes;
|
|
|
|
|
aql.private_segment_size =
|
|
|
|
|
f->_header->workitem_private_segment_byte_size;
|
2017-02-09 17:22:55 -06:00
|
|
|
aql.kernel_object = f->_object;
|
2016-12-05 20:21:33 -06:00
|
|
|
aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
|
2018-03-12 11:29:03 +05:30
|
|
|
aql.header =
|
2019-03-06 12:55:39 +05:30
|
|
|
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE);
|
|
|
|
|
if((flags & 0x1)== 0 ) {
|
|
|
|
|
//in_order
|
|
|
|
|
aql.header |= (1 << HSA_PACKET_HEADER_BARRIER);
|
|
|
|
|
}
|
2016-10-14 23:46:29 -05:00
|
|
|
|
2017-01-25 21:50:52 -06:00
|
|
|
if (HCC_OPT_FLUSH) {
|
|
|
|
|
aql.header |= (HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
|
|
|
|
(HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
|
|
|
|
} else {
|
|
|
|
|
aql.header |= (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
|
|
|
|
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
|
|
|
|
};
|
|
|
|
|
|
2017-04-06 23:55:15 +00:00
|
|
|
|
|
|
|
|
hc::completion_future cf;
|
|
|
|
|
|
2017-10-09 13:27:11 +01:00
|
|
|
lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize,
|
2018-03-12 11:29:03 +05:30
|
|
|
(startEvent || stopEvent) ? &cf : nullptr
|
2017-08-08 10:15:32 +05:30
|
|
|
#if (__hcc_workweek__ > 17312)
|
2018-03-12 11:29:03 +05:30
|
|
|
,
|
|
|
|
|
f->_name.c_str()
|
2017-07-27 23:00:58 -05:00
|
|
|
#endif
|
2018-03-12 11:29:03 +05:30
|
|
|
);
|
2017-04-06 23:55:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if (startEvent) {
|
2017-05-30 21:54:33 -05:00
|
|
|
startEvent->attachToCompletionFuture(&cf, hStream, hipEventTypeStartCommand);
|
2017-04-06 23:55:15 +00:00
|
|
|
}
|
|
|
|
|
if (stopEvent) {
|
2018-03-12 11:29:03 +05:30
|
|
|
stopEvent->attachToCompletionFuture(&cf, hStream, hipEventTypeStopCommand);
|
2017-04-06 23:55:15 +00:00
|
|
|
}
|
|
|
|
|
|
2016-12-05 16:55:26 +05:30
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (kernelParams != NULL) {
|
|
|
|
|
free(config[1]);
|
2017-03-17 13:11:34 -05:00
|
|
|
}
|
2017-02-10 13:32:13 -06:00
|
|
|
ihipPostLaunchKernel(f->_name.c_str(), hStream, lp);
|
2016-08-16 14:36:25 -05:00
|
|
|
}
|
2016-08-19 08:49:34 -05:00
|
|
|
|
2017-03-31 12:11:34 -05:00
|
|
|
return ret;
|
2016-08-16 14:36:25 -05:00
|
|
|
}
|
2016-08-25 14:16:53 -05:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY,
|
|
|
|
|
uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY,
|
|
|
|
|
uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream,
|
|
|
|
|
void** kernelParams, void** extra) {
|
2018-11-08 08:36:50 -06:00
|
|
|
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes,
|
2018-03-12 11:29:03 +05:30
|
|
|
hStream, kernelParams, extra);
|
|
|
|
|
return ihipLogStatus(ihipModuleLaunchKernel(
|
|
|
|
|
f, blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ, blockDimX, blockDimY,
|
2019-03-06 12:55:39 +05:30
|
|
|
blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr, 0));
|
2017-03-31 12:11:34 -05:00
|
|
|
}
|
|
|
|
|
|
2019-03-06 12:55:39 +05:30
|
|
|
hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
|
|
|
|
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
|
|
|
|
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
|
|
|
|
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
|
|
|
|
hipStream_t hStream, void** kernelParams, void** extra,
|
|
|
|
|
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
|
|
|
|
|
HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
|
|
|
|
|
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
|
|
|
|
|
return ihipLogStatus(ihipModuleLaunchKernel(
|
|
|
|
|
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
|
|
|
|
|
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
|
|
|
|
|
}
|
2017-03-31 12:11:34 -05:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
|
|
|
|
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
|
|
|
|
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
|
|
|
|
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
|
|
|
|
hipStream_t hStream, void** kernelParams, void** extra,
|
|
|
|
|
hipEvent_t startEvent, hipEvent_t stopEvent) {
|
2018-11-08 08:36:50 -06:00
|
|
|
HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
|
2018-03-12 11:29:03 +05:30
|
|
|
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
|
|
|
|
|
return ihipLogStatus(ihipModuleLaunchKernel(
|
|
|
|
|
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
|
2019-03-06 12:55:39 +05:30
|
|
|
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, 0));
|
2017-03-31 12:11:34 -05:00
|
|
|
}
|
2016-08-25 14:16:53 -05:00
|
|
|
|
2019-03-06 14:01:44 +02:00
|
|
|
namespace hip_impl {
|
|
|
|
|
hsa_executable_t executable_for(hipModule_t hmod) {
|
|
|
|
|
return hmod->executable;
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-06 18:27:28 +00:00
|
|
|
const char* hash_for(hipModule_t hmod) {
|
|
|
|
|
return hmod->hash.c_str();
|
2019-03-06 14:01:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_agent_t this_agent() {
|
|
|
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
|
|
|
|
|
|
|
|
if (!ctx) throw runtime_error{"No active HIP context."};
|
|
|
|
|
|
|
|
|
|
auto device = ctx->getDevice();
|
|
|
|
|
|
|
|
|
|
if (!device) throw runtime_error{"No device available for HIP."};
|
2017-11-03 01:44:48 +00:00
|
|
|
|
2019-03-06 14:01:44 +02:00
|
|
|
ihipDevice_t* currentDevice = ihipGetDevice(device->_deviceId);
|
|
|
|
|
|
|
|
|
|
if (!currentDevice) throw runtime_error{"No active device for HIP."};
|
|
|
|
|
|
|
|
|
|
return currentDevice->_hsaAgent;
|
|
|
|
|
}
|
|
|
|
|
} // Namespace hip_impl.
|
|
|
|
|
|
|
|
|
|
namespace {
|
2018-11-13 07:01:17 +05:30
|
|
|
inline void track(const Agent_global& x, hsa_agent_t agent) {
|
2018-03-12 11:29:03 +05:30
|
|
|
tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name.c_str(),
|
|
|
|
|
x.address, x.byte_cnt);
|
2017-11-03 01:44:48 +00:00
|
|
|
|
2018-11-13 07:01:17 +05:30
|
|
|
int deviceIndex =0;
|
|
|
|
|
for ( deviceIndex = 0; deviceIndex < g_deviceCnt; deviceIndex++) {
|
|
|
|
|
if(g_allAgents[deviceIndex] == agent)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
auto device = ihipGetDevice(deviceIndex - 1);
|
2018-03-12 11:29:03 +05:30
|
|
|
hc::AmPointerInfo ptr_info(nullptr, x.address, x.address, x.byte_cnt, device->_acc, true,
|
|
|
|
|
false);
|
|
|
|
|
hc::am_memtracker_add(x.address, ptr_info);
|
2018-10-31 03:22:38 +05:30
|
|
|
#if USE_APP_PTR_FOR_CTX
|
|
|
|
|
hc::am_memtracker_update(x.address, device->_deviceId, 0u, ihipGetTlsDefaultCtx());
|
|
|
|
|
#else
|
2018-03-12 11:29:03 +05:30
|
|
|
hc::am_memtracker_update(x.address, device->_deviceId, 0u);
|
2018-10-31 03:22:38 +05:30
|
|
|
#endif
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
}
|
2017-11-03 01:44:48 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
template <typename Container = vector<Agent_global>>
|
2018-11-13 07:01:17 +05:30
|
|
|
inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t agent,
|
2018-03-12 11:29:03 +05:30
|
|
|
hsa_executable_symbol_t x, void* out) {
|
2019-03-06 14:01:44 +02:00
|
|
|
using namespace hip_impl;
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
assert(out);
|
|
|
|
|
|
|
|
|
|
hsa_symbol_kind_t t = {};
|
|
|
|
|
hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &t);
|
2017-11-03 01:44:48 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (t == HSA_SYMBOL_KIND_VARIABLE) {
|
|
|
|
|
static_cast<Container*>(out)->push_back(Agent_global{name(x), address(x), size(x)});
|
|
|
|
|
|
2018-11-13 07:01:17 +05:30
|
|
|
track(static_cast<Container*>(out)->back(),agent);
|
2017-11-03 01:44:48 +00:00
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const char* kname) {
|
2019-03-06 14:01:44 +02:00
|
|
|
using namespace hip_impl;
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
pair<const char*, hsa_executable_symbol_t> r{kname, {}};
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hsa_executable_iterate_agent_symbols(
|
|
|
|
|
executable, this_agent(),
|
|
|
|
|
[](hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* s) {
|
|
|
|
|
auto p = static_cast<pair<const char*, hsa_executable_symbol_t>*>(s);
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (type(x) != HSA_SYMBOL_KIND_KERNEL) {
|
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
if (name(x) != p->first) return HSA_STATUS_SUCCESS;
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
p->second = x;
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
return HSA_STATUS_INFO_BREAK;
|
|
|
|
|
},
|
|
|
|
|
&r);
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
return r.second;
|
|
|
|
|
}
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2019-03-06 14:01:44 +02:00
|
|
|
string read_elf_file_as_string(const void* file) {
|
|
|
|
|
// Precondition: file points to an ELF image that was BITWISE loaded
|
2018-03-12 11:29:03 +05:30
|
|
|
// into process accessible memory, and not one loaded by
|
|
|
|
|
// the loader. This is because in the latter case
|
|
|
|
|
// alignment may differ, which will break the size
|
|
|
|
|
// computation.
|
|
|
|
|
// the image is Elf64, and matches endianness i.e. it is
|
|
|
|
|
// Little Endian.
|
|
|
|
|
if (!file) return {};
|
|
|
|
|
|
2019-03-06 14:01:44 +02:00
|
|
|
auto h = static_cast<const ELFIO::Elf64_Ehdr*>(file);
|
2018-03-12 11:29:03 +05:30
|
|
|
auto s = static_cast<const char*>(file);
|
|
|
|
|
// This assumes the common case of SHT being the last part of the ELF.
|
2019-03-06 14:01:44 +02:00
|
|
|
auto sz =
|
|
|
|
|
sizeof(ELFIO::Elf64_Ehdr) + h->e_shoff + h->e_shentsize * h->e_shnum;
|
2018-03-12 11:29:03 +05:30
|
|
|
|
|
|
|
|
return string{s, s + sz};
|
|
|
|
|
}
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
string code_object_blob_for_agent(const void* maybe_bundled_code, hsa_agent_t agent) {
|
2019-03-06 14:01:44 +02:00
|
|
|
using namespace hip_impl;
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (!maybe_bundled_code) return {};
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
Bundled_code_header tmp{maybe_bundled_code};
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (!valid(tmp)) return {};
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
const auto agent_isa = isa(agent);
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
const auto it = find_if(bundles(tmp).cbegin(), bundles(tmp).cend(), [=](const Bundled_code& x) {
|
|
|
|
|
return agent_isa == triple_to_hsa_isa(x.triple);
|
|
|
|
|
;
|
|
|
|
|
});
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (it == bundles(tmp).cend()) return {};
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
return string{it->blob.cbegin(), it->blob.cend()};
|
|
|
|
|
}
|
2019-03-06 14:01:44 +02:00
|
|
|
} // Unnamed namespace.
|
|
|
|
|
|
|
|
|
|
namespace hip_impl {
|
|
|
|
|
vector<Agent_global> read_agent_globals(hsa_agent_t agent,
|
|
|
|
|
hsa_executable_t executable) {
|
|
|
|
|
vector<Agent_global> r;
|
|
|
|
|
|
|
|
|
|
hsa_executable_iterate_agent_symbols(
|
|
|
|
|
executable, agent, copy_agent_global_variables, &r);
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
} // Namespace hip_impl.
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const char* name) {
|
2019-03-06 14:01:44 +02:00
|
|
|
using namespace hip_impl;
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
if (!func || !name) return hipErrorInvalidValue;
|
2017-12-03 23:09:06 +00:00
|
|
|
|
|
|
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
if (!ctx) return hipErrorInvalidContext;
|
2017-12-03 23:09:06 +00:00
|
|
|
|
|
|
|
|
*func = new ihipModuleSymbol_t;
|
|
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
if (!*func) return hipErrorInvalidValue;
|
2017-12-03 23:09:06 +00:00
|
|
|
|
|
|
|
|
auto kernel = find_kernel_by_name(hmod->executable, name);
|
|
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
if (kernel.handle == 0u) return hipErrorNotFound;
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-05-11 03:35:10 +01:00
|
|
|
// TODO: refactor the whole ihipThisThat, which is a mess and yields the
|
|
|
|
|
// below, due to hipFunction_t being a pointer to ihipModuleSymbol_t.
|
|
|
|
|
func[0][0] = *static_cast<hipFunction_t>(
|
|
|
|
|
Kernel_descriptor{kernel_object(kernel), name});
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
return hipSuccess;
|
2017-12-03 23:09:06 +00:00
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) {
|
2018-11-08 08:36:50 -06:00
|
|
|
HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name);
|
2017-12-03 23:09:06 +00:00
|
|
|
return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name));
|
2017-11-21 02:40:34 +00:00
|
|
|
}
|
|
|
|
|
|
2019-03-06 14:01:44 +02:00
|
|
|
namespace {
|
|
|
|
|
hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header) {
|
|
|
|
|
hipFuncAttributes r{};
|
|
|
|
|
|
|
|
|
|
hipDeviceProp_t prop{};
|
|
|
|
|
hipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId);
|
|
|
|
|
// TODO: at the moment there is no way to query the count of registers
|
|
|
|
|
// available per CU, therefore we hardcode it to 64 KiRegisters.
|
|
|
|
|
prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024;
|
|
|
|
|
|
|
|
|
|
r.localSizeBytes = header.workitem_private_segment_byte_size;
|
|
|
|
|
r.sharedSizeBytes = header.workgroup_group_segment_byte_size;
|
|
|
|
|
r.maxDynamicSharedSizeBytes = prop.sharedMemPerBlock - r.sharedSizeBytes;
|
|
|
|
|
r.numRegs = header.workitem_vgpr_count;
|
|
|
|
|
r.maxThreadsPerBlock = r.numRegs ?
|
|
|
|
|
std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) :
|
|
|
|
|
prop.maxThreadsPerBlock;
|
|
|
|
|
r.binaryVersion =
|
|
|
|
|
header.amd_machine_version_major * 10 +
|
|
|
|
|
header.amd_machine_version_minor;
|
|
|
|
|
r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0.
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-11-06 09:54:34 +01:00
|
|
|
return r;
|
2016-08-25 14:16:53 -05:00
|
|
|
}
|
2019-03-06 14:01:44 +02:00
|
|
|
} // Unnamed namespace.
|
2018-05-11 03:35:10 +01:00
|
|
|
|
|
|
|
|
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
|
|
|
|
|
{
|
2019-03-06 14:01:44 +02:00
|
|
|
using namespace hip_impl;
|
|
|
|
|
|
2018-05-14 17:15:36 +01:00
|
|
|
if (!attr) return hipErrorInvalidValue;
|
2018-05-11 03:35:10 +01:00
|
|
|
if (!func) return hipErrorInvalidDeviceFunction;
|
|
|
|
|
|
|
|
|
|
const auto it0 = functions().find(reinterpret_cast<uintptr_t>(func));
|
|
|
|
|
|
|
|
|
|
if (it0 == functions().cend()) return hipErrorInvalidDeviceFunction;
|
|
|
|
|
|
|
|
|
|
auto agent = this_agent();
|
|
|
|
|
const auto it1 = find_if(
|
|
|
|
|
it0->second.cbegin(),
|
|
|
|
|
it0->second.cend(),
|
|
|
|
|
[=](const pair<hsa_agent_t, Kernel_descriptor>& x) {
|
|
|
|
|
return x.first == agent;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (it1 == it0->second.cend()) return hipErrorInvalidDeviceFunction;
|
|
|
|
|
|
|
|
|
|
const auto header = static_cast<hipFunction_t>(it1->second)->_header;
|
|
|
|
|
|
|
|
|
|
if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."};
|
|
|
|
|
|
|
|
|
|
*attr = make_function_attributes(*header);
|
|
|
|
|
|
|
|
|
|
return hipSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
|
2019-03-06 14:01:44 +02:00
|
|
|
using namespace hip_impl;
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
if (!module) return hipErrorInvalidValue;
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
*module = new ihipModule_t;
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
auto ctx = ihipGetTlsDefaultCtx();
|
|
|
|
|
if (!ctx) return hipErrorInvalidContext;
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-11-13 22:28:00 -05:00
|
|
|
// try extracting code object from image as fatbin.
|
|
|
|
|
char name[64] = {};
|
|
|
|
|
hsa_agent_get_info(this_agent(), HSA_AGENT_INFO_NAME, name);
|
|
|
|
|
if (auto *code_obj = __hipExtractCodeObjectFromFatBinary(image, name))
|
|
|
|
|
image = code_obj;
|
|
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, nullptr,
|
|
|
|
|
&(*module)->executable);
|
|
|
|
|
|
|
|
|
|
auto tmp = code_object_blob_for_agent(image, this_agent());
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2019-01-08 17:18:06 +00:00
|
|
|
auto content = tmp.empty() ? read_elf_file_as_string(image) : tmp;
|
|
|
|
|
|
2019-03-06 14:01:44 +02:00
|
|
|
(*module)->executable = load_executable(content, (*module)->executable,
|
|
|
|
|
this_agent());
|
2019-01-08 17:18:06 +00:00
|
|
|
|
|
|
|
|
// compute the hash of the code object
|
|
|
|
|
(*module)->hash = checksum(content.length(), content.data());
|
2018-05-11 03:35:10 +01:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
return (*module)->executable.handle ? hipSuccess : hipErrorUnknown;
|
2017-12-08 04:22:57 +00:00
|
|
|
}
|
|
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
|
2018-11-08 08:36:50 -06:00
|
|
|
HIP_INIT_API(hipModuleLoadData, module, image);
|
2018-04-16 15:35:04 +05:30
|
|
|
return ihipLogStatus(ihipModuleLoadData(module,image));
|
|
|
|
|
}
|
2016-08-25 14:16:53 -05:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
|
2018-11-08 08:36:50 -06:00
|
|
|
HIP_INIT_API(hipModuleLoad, module, fname);
|
2016-08-25 14:16:53 -05:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
|
2016-08-25 14:16:53 -05:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
ifstream file{fname};
|
2016-12-17 07:19:22 -06:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
|
2017-12-08 04:22:57 +00:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
vector<char> tmp{istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
|
2016-12-17 07:19:22 -06:00
|
|
|
|
2018-04-16 15:35:04 +05:30
|
|
|
return ihipLogStatus(ihipModuleLoadData(module, tmp.data()));
|
2016-08-25 14:16:53 -05:00
|
|
|
}
|
2017-05-19 17:22:14 +03:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions,
|
|
|
|
|
hipJitOption* options, void** optionValues) {
|
2018-11-08 08:36:50 -06:00
|
|
|
HIP_INIT_API(hipModuleLoadDataEx, module, image, numOptions, options, optionValues);
|
2018-04-16 15:35:04 +05:30
|
|
|
return ihipLogStatus(ihipModuleLoadData(module, image));
|
2017-05-19 17:22:14 +03:00
|
|
|
}
|
2017-11-09 22:10:55 +05:30
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) {
|
2019-03-06 14:01:44 +02:00
|
|
|
using namespace hip_impl;
|
|
|
|
|
|
2018-11-08 08:36:50 -06:00
|
|
|
HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name);
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2017-11-21 21:19:06 +05:30
|
|
|
hipError_t ret = hipErrorNotFound;
|
2018-03-12 11:29:03 +05:30
|
|
|
if (!texRef) return ihipLogStatus(hipErrorInvalidValue);
|
2017-12-03 23:09:06 +00:00
|
|
|
|
2018-03-12 11:29:03 +05:30
|
|
|
if (!hmod || !name) return ihipLogStatus(hipErrorNotInitialized);
|
2017-12-03 23:09:06 +00:00
|
|
|
|
|
|
|
|
const auto it = globals().find(name);
|
|
|
|
|
if (it == globals().end()) return ihipLogStatus(hipErrorInvalidValue);
|
|
|
|
|
|
2018-01-30 18:06:31 +05:30
|
|
|
*texRef = reinterpret_cast<textureReference*>(it->second);
|
2017-12-03 23:09:06 +00:00
|
|
|
return ihipLogStatus(hipSuccess);
|
2017-11-09 22:10:55 +05:30
|
|
|
}
|