Files
rocm-systems/runtime/hsa-runtime/loader/loaders.cpp
T
Sean Keely 7333c77e22 Squash merge of cfreehil/amd-temp-gfx90a onto amd-staging.
Includes some workarounds and HMM.
Conflicts:
	opensrc/hsa-runtime/core/runtime/amd_topology.cpp
	opensrc/hsa-runtime/core/util/flag.h

Change-Id: I22976f07964a43dbb228a6231777dbd599112b8d
2021-04-02 02:10:15 -04:00

285 строки
9.6 KiB
C++

////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include <cstring>
#include <cassert>
#include "loaders.hpp"
namespace rocr {
namespace amd {
namespace hsa {
namespace loader {
// Helper function that allocates an aligned memory.
static inline void*
alignedMalloc(size_t size, size_t alignment)
{
#if defined(_WIN32)
return ::_aligned_malloc(size, alignment);
#else
void * ptr = NULL;
alignment = (std::max)(alignment, sizeof(void*));
if (0 == ::posix_memalign(&ptr, alignment, size)) {
return ptr;
}
return NULL;
#endif
}
// Helper function that frees an aligned memory.
static inline void
alignedFree(void *ptr)
{
#if defined(_WIN32)
::_aligned_free(ptr);
#else
free(ptr);
#endif
}
OfflineLoaderContext::OfflineLoaderContext()
: out(std::cout)
{
invalid.handle = 0;
gfx700.handle = 0x700;
gfx701.handle = 0x701;
gfx702.handle = 0x702;
gfx801.handle = 0x801;
gfx802.handle = 0x802;
gfx803.handle = 0x803;
gfx805.handle = 0x805;
gfx810.handle = 0x810;
gfx900.handle = 0x900;
gfx902.handle = 0x902;
gfx904.handle = 0x904;
gfx906.handle = 0x906;
gfx908.handle = 0x908;
gfx90a.handle = 0x90a;
gfx1010.handle = 0x1010;
gfx1011.handle = 0x1011;
gfx1012.handle = 0x1012;
gfx1030.handle = 0x1030;
gfx1031.handle = 0x1031;
gfx1032.handle = 0x1032;
gfx1033.handle = 0x1033;
}
hsa_isa_t OfflineLoaderContext::IsaFromName(const char *name)
{
std::string sname(name);
if (sname == "AMD:AMDGPU:7:0:0") {
return gfx700;
} else if (sname == "AMD:AMDGPU:7:0:1") {
return gfx701;
} else if (sname == "AMD:AMDGPU:7:0:2") {
return gfx702;
} else if (sname == "AMD:AMDGPU:7:0:3") {
return gfx703;
} else if (sname == "AMD:AMDGPU:7:0:4") {
return gfx704;
} else if (sname == "AMD:AMDGPU:7:0:5") {
return gfx705;
} else if (sname == "AMD:AMDGPU:8:0:1") {
return gfx801;
} else if (sname == "AMD:AMDGPU:8:0:0" || sname == "AMD:AMDGPU:8:0:2") {
return gfx802;
} else if (sname == "AMD:AMDGPU:8:0:3" || sname == "AMD:AMDGPU:8:0:4") {
return gfx803;
} else if (sname == "AMD:AMDGPU:8:0:5") {
return gfx805;
} else if (sname == "AMD:AMDGPU:8:1:0") {
return gfx810;
} else if (sname == "AMD:AMDGPU:9:0:0" || sname == "AMD:AMDGPU:9:0:1") {
return gfx900;
} else if (sname == "AMD:AMDGPU:9:0:2" || sname == "AMD:AMDGPU:9:0:3") {
return gfx902;
} else if (sname == "AMD:AMDGPU:9:0:4" || sname == "AMD:AMDGPU:9:0:5") {
return gfx904;
} else if (sname == "AMD:AMDGPU:9:0:6" || sname == "AMD:AMDGPU:9:0:7") {
return gfx906;
} else if (sname == "AMD:AMDGPU:9:0:8") {
return gfx908;
} else if (sname == "AMD:AMDGPU:9:0:A") {
return gfx90a;
} else if (sname == "AMD:AMDGPU:10:1:0") {
return gfx1010;
} else if (sname == "AMD:AMDGPU:10:1:1") {
return gfx1011;
} else if (sname == "AMD:AMDGPU:10:1:2") {
return gfx1012;
} else if (sname == "AMD:AMDGPU:10:3:0") {
return gfx1030;
} else if (sname == "AMD:AMDGPU:10:3:1") {
return gfx1031;
} else if (sname == "AMD:AMDGPU:10:3:2") {
return gfx1032;
} else if (sname == "AMD:AMDGPU:10:3:3") {
return gfx1033;
}
// The offline loader only supports code object v2 which only supports
// asics up to gfx906. Do NOT add handling of new asics into this
// if-else-if* block.
assert(0);
return invalid;
}
bool OfflineLoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa)
{
return true;
}
void* OfflineLoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero)
{
void* ptr = alignedMalloc(size, align);
if (zero) { memset(ptr, 0, size); }
out << "SegmentAlloc: " << segment << ": " << "size=" << size << " align=" << align << " zero=" << zero << " result=" << ptr << std::endl;
pointers.insert(ptr);
return ptr;
}
bool OfflineLoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size)
{
out << "SegmentCopy: " << segment << ": " << "dst=" << dst << " offset=" << offset << " src=" << src << " size=" << size << std::endl;
if (!dst || !src || dst == src) {
return false;
}
if (0 == size) {
return true;
}
memcpy((char *) dst + offset, src, size);
return true;
}
void OfflineLoaderContext::SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
{
out << "SegmentFree: " << segment << ": " << " ptr=" << seg << " size=" << size << std::endl;
pointers.erase(seg);
alignedFree(seg);
}
void* OfflineLoaderContext::SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
{
out << "SegmentAddress: " << segment << ": " << " ptr=" << seg << " offset=" << offset << std::endl;
return (char*) seg + offset;
}
void* OfflineLoaderContext::SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
{
out << "SegmentHostAddress: " << segment << ": " << " ptr=" << seg << " offset=" << offset << std::endl;
return (char*) seg + offset;
}
bool OfflineLoaderContext::SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
{
out << "SegmentFreeze: " << segment << ": " << " ptr=" << seg << " size=" << size << std::endl;
return true;
}
bool OfflineLoaderContext::ImageExtensionSupported()
{
return true;
}
hsa_status_t OfflineLoaderContext::ImageCreate(
hsa_agent_t agent,
hsa_access_permission_t image_permission,
const hsa_ext_image_descriptor_t *image_descriptor,
const void *image_data,
hsa_ext_image_t *image_handle)
{
void* ptr = alignedMalloc(256, 8);
out << "ImageCreate" << ":" <<
" permission=" << image_permission <<
" geometry=" << image_descriptor->geometry <<
" width=" << image_descriptor->width <<
" height=" << image_descriptor->height <<
" depth=" << image_descriptor->depth <<
" array_size=" << image_descriptor->array_size <<
" channel_type=" << image_descriptor->format.channel_type <<
" channel_order=" << image_descriptor->format.channel_order<<
" data=" << image_data <<
std::endl;
pointers.insert(ptr);
image_handle->handle = reinterpret_cast<uint64_t>(ptr);
return HSA_STATUS_SUCCESS;
}
hsa_status_t OfflineLoaderContext::ImageDestroy(
hsa_agent_t agent, hsa_ext_image_t image_handle)
{
void* ptr = reinterpret_cast<void*>(image_handle.handle);
pointers.erase(ptr);
alignedFree(ptr);
return HSA_STATUS_SUCCESS;
}
hsa_status_t OfflineLoaderContext::SamplerCreate(
hsa_agent_t agent,
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
hsa_ext_sampler_t *sampler_handle)
{
void* ptr = alignedMalloc(256, 8);
out << "SamplerCreate" << ":" <<
" coordinate_mode=" << sampler_descriptor->coordinate_mode <<
" filter_mode=" << sampler_descriptor->filter_mode <<
" address_mode=" << sampler_descriptor->address_mode <<
std::endl;
pointers.insert(ptr);
sampler_handle->handle = reinterpret_cast<uint64_t>(ptr);
return HSA_STATUS_SUCCESS;
}
hsa_status_t OfflineLoaderContext::SamplerDestroy(
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle)
{
void* ptr = reinterpret_cast<void*>(sampler_handle.handle);
pointers.erase(ptr);
alignedFree(ptr);
return HSA_STATUS_SUCCESS;
}
} // namespace loader
} // namespace hsa
} // namespace amd
} // namespace rocr