diff --git a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp b/runtime/hsa-runtime/core/common/hsa_table_interface.cpp index e593f2d4b0..5cc20378b7 100644 --- a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp +++ b/runtime/hsa-runtime/core/common/hsa_table_interface.cpp @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -865,7 +865,7 @@ hsa_status_t HSA_API hsa_amd_profiling_get_dispatch_time( hsa_agent_t agent, hsa_signal_t signal, hsa_amd_profiling_dispatch_time_t* time) { return amdExtTable->hsa_amd_profiling_get_dispatch_time_fn( - agent, signal, time); + agent, signal, time); } hsa_status_t HSA_API @@ -1017,16 +1017,16 @@ hsa_status_t HSA_API } // Mirrors Amd Extension Apis -hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents, - hsa_agent_t* agents, - int interop_handle, - uint32_t flags, - size_t* size, - void** ptr, - size_t* metadata_size, +hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents, + hsa_agent_t* agents, + int interop_handle, + uint32_t flags, + size_t* size, + void** ptr, + size_t* metadata_size, const void** metadata) { return amdExtTable->hsa_amd_interop_map_buffer_fn( - num_agents, agents, interop_handle, + num_agents, agents, interop_handle, flags, size, ptr, metadata_size, metadata); } @@ -1094,3 +1094,11 @@ hsa_status_t HSA_API hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handl hsa_signal_t* signal) { return amdExtTable->hsa_amd_ipc_signal_attach_fn(handle, signal); } + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_register_system_event_handler( + hsa_amd_event_t type, + hsa_status_t (*callback)(const void* event_specific_data, void* data), + void* data) { + return amdExtTable->hsa_amd_register_system_event_handler_fn(type, callback, data); +} diff --git a/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp b/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp index a2c9dc6fbc..4b90f0e2c0 100644 --- a/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp +++ b/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp @@ -208,6 +208,15 @@ public: void *data), void *data) = 0; + virtual hsa_agent_t getAgent() const = 0; + virtual hsa_executable_t getExecutable() const = 0; + virtual uint64_t getElfData() const = 0; + virtual uint64_t getElfSize() const = 0; + virtual uint64_t getStorageOffset() const = 0; + virtual uint64_t getLoadBase() const = 0; + virtual uint64_t getLoadSize() const = 0; + virtual int64_t getDelta() const = 0; + protected: LoadedCodeObject() {} @@ -329,6 +338,7 @@ public: virtual hsa_status_t IterateLoadedCodeObjects( hsa_status_t (*callback)( + hsa_executable_t executable, hsa_loaded_code_object_t loaded_code_object, void *data), void *data) = 0; diff --git a/runtime/hsa-runtime/core/inc/host_queue.h b/runtime/hsa-runtime/core/inc/host_queue.h index 90ef35120a..9260bcff5c 100644 --- a/runtime/hsa-runtime/core/inc/host_queue.h +++ b/runtime/hsa-runtime/core/inc/host_queue.h @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -164,6 +164,10 @@ class HostQueue : public Queue { bool active_; void* ring_; + // Host queue id counter, starting from 0x80000000 to avoid overlaping + // with aql queue id. + static volatile uint32_t queue_count_; + DISALLOW_COPY_AND_ASSIGN(HostQueue); }; } // namespace core diff --git a/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h b/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h index 94024a9206..a8066710af 100644 --- a/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h +++ b/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h @@ -174,13 +174,13 @@ hsa_status_t HSA_API hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count); // Mirrors Amd Extension Apis -hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents, - hsa_agent_t* agents, - int interop_handle, - uint32_t flags, - size_t* size, - void** ptr, - size_t* metadata_size, +hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents, + hsa_agent_t* agents, + int interop_handle, + uint32_t flags, + size_t* size, + void** ptr, + size_t* metadata_size, const void** metadata); // Mirrors Amd Extension Apis @@ -210,6 +210,12 @@ hsa_status_t hsa_amd_ipc_signal_create(hsa_signal_t signal, hsa_amd_ipc_signal_t // Mirrors Amd Extension Apis hsa_status_t hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handle, hsa_signal_t* signal); +// Mirrors Amd Extension Apis +hsa_status_t hsa_amd_register_system_event_handler( + hsa_amd_event_t type, + hsa_status_t (*callback)(const void* event_specific_data, void* data), + void* data); + } // end of AMD namespace #endif // header guard diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h index 1a18149db3..4bd4d2ad52 100644 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ b/runtime/hsa-runtime/core/inc/runtime.h @@ -317,6 +317,8 @@ class Runtime { ExtensionEntryPoints extensions_; + hsa_status_t SetCustomVMFaultHandler(hsa_status_t (*callback)(const void* event_specific_data, void* data), void* data); + protected: static void AsyncEventsLoop(void*); @@ -479,9 +481,6 @@ class Runtime { AsyncEvents new_async_events_; - // Queue id counter. - uint32_t queue_count_; - // Starting address of SVM address space. // On APU the cpu and gpu could access the area inside starting and end of // the SVM address space. @@ -502,6 +501,11 @@ class Runtime { // @brief HSA signal to contain the VM fault event. Signal* vm_fault_signal_; + // custom VM fault handler. + hsa_status_t (*vm_fault_handler_custom_)(const void* event_specific_data, + void* data); + void* vm_fault_handler_user_data_; + // Holds reference count to runtime object. volatile uint32_t ref_count_; diff --git a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 787243a747..c1f295eb33 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -171,7 +171,7 @@ AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id, Scr amd_queue_.hsa_queue.base_address = ring_buf_; amd_queue_.hsa_queue.doorbell_signal = Signal::Convert(this); amd_queue_.hsa_queue.size = queue_size_pkts; - amd_queue_.hsa_queue.id = core::Runtime::runtime_singleton_->GetQueueId(); + amd_queue_.hsa_queue.id = queue_id_; amd_queue_.read_dispatch_id_field_base_byte_offset = uint32_t( uintptr_t(&amd_queue_.read_dispatch_id) - uintptr_t(&amd_queue_)); @@ -893,21 +893,21 @@ void AqlQueue::InitScratchSRD() { SQ_BUF_RSRC_WORD1 srd1; SQ_BUF_RSRC_WORD2 srd2; SQ_BUF_RSRC_WORD3 srd3; - + uint32_t scratch_base_hi = 0; uintptr_t scratch_base = uintptr_t(queue_scratch_.queue_base); #ifdef HSA_LARGE_MODEL scratch_base_hi = uint32_t(scratch_base >> 32); #endif srd0.bits.BASE_ADDRESS = uint32_t(scratch_base); - + srd1.bits.BASE_ADDRESS_HI = scratch_base_hi; srd1.bits.STRIDE = 0; srd1.bits.CACHE_SWIZZLE = 0; srd1.bits.SWIZZLE_ENABLE = 1; - + srd2.bits.NUM_RECORDS = uint32_t(queue_scratch_.size); - + srd3.bits.DST_SEL_X = SQ_SEL_X; srd3.bits.DST_SEL_Y = SQ_SEL_Y; srd3.bits.DST_SEL_Z = SQ_SEL_Z; @@ -922,8 +922,8 @@ void AqlQueue::InitScratchSRD() { srd3.bits.HEAP = 0; srd3.bits.MTYPE__CI__VI = 0; srd3.bits.TYPE = SQ_RSRC_BUF; - - // Update Queue's Scratch descriptor's property + + // Update Queue's Scratch descriptor's property amd_queue_.scratch_resource_descriptor[0] = srd0.u32All; amd_queue_.scratch_resource_descriptor[1] = srd1.u32All; amd_queue_.scratch_resource_descriptor[2] = srd2.u32All; @@ -949,7 +949,7 @@ void AqlQueue::InitScratchSRD() { uint32_t max_scratch_waves = num_cus * agent_props.MaxSlotsScratchCU; // Scratch is allocated program COMPUTE_TMPRING_SIZE register - // Scratch Size per Wave is specified in terms of kilobytes + // Scratch Size per Wave is specified in terms of kilobytes uint32_t wave_size = agent_props.WaveFrontSize; tmpring_size.bits.WAVESIZE = (((wave_size * queue_scratch_.size_per_thread) + 1023) / 1024); diff --git a/runtime/hsa-runtime/core/runtime/host_queue.cpp b/runtime/hsa-runtime/core/runtime/host_queue.cpp index 5d3d1b2501..09d579545e 100644 --- a/runtime/hsa-runtime/core/runtime/host_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/host_queue.cpp @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -46,6 +46,9 @@ #include "core/util/utils.h" namespace core { + +volatile uint32_t HostQueue::queue_count_ = 0x80000000; + HostQueue::HostQueue(hsa_region_t region, uint32_t ring_size, hsa_queue_type32_t type, uint32_t features, hsa_signal_t doorbell_signal) @@ -70,7 +73,7 @@ HostQueue::HostQueue(hsa_region_t region, uint32_t ring_size, amd_queue_.hsa_queue.base_address = ring_; amd_queue_.hsa_queue.size = size_; amd_queue_.hsa_queue.doorbell_signal = doorbell_signal; - amd_queue_.hsa_queue.id = Runtime::runtime_singleton_->GetQueueId(); + amd_queue_.hsa_queue.id = atomic::Increment(&queue_count_); amd_queue_.hsa_queue.type = type; amd_queue_.hsa_queue.features = features; #ifdef HSA_LARGE_MODEL diff --git a/runtime/hsa-runtime/core/runtime/hsa.cpp b/runtime/hsa-runtime/core/runtime/hsa.cpp index 827cd4de3f..bf953d10b8 100644 --- a/runtime/hsa-runtime/core/runtime/hsa.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa.cpp @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -219,7 +219,7 @@ hsa_status_t hsa_shut_down() { //---------------------------------------------------------------------------// // System //---------------------------------------------------------------------------// -hsa_status_t +hsa_status_t hsa_system_get_info(hsa_system_info_t attribute, void* value) { TRY; IS_OPEN(); @@ -262,7 +262,7 @@ hsa_status_t hsa_extension_get_name(uint16_t extension, const char** name) { CATCH; } -hsa_status_t +hsa_status_t hsa_system_extension_supported(uint16_t extension, uint16_t version_major, uint16_t version_minor, bool* result) { TRY; @@ -342,6 +342,7 @@ static size_t get_extension_table_length(uint16_t extension, uint16_t major, uin {"hsa_ext_images_1_00_pfn_t", sizeof(hsa_ext_images_1_00_pfn_t)}, {"hsa_ext_finalizer_1_00_pfn_t", sizeof(hsa_ext_finalizer_1_00_pfn_t)}, {"hsa_ven_amd_loader_1_00_pfn_t", sizeof(hsa_ven_amd_loader_1_00_pfn_t)}, + {"hsa_ven_amd_loader_1_01_pfn_t", sizeof(hsa_ven_amd_loader_1_01_pfn_t)}, {"hsa_ven_amd_aqlprofile_1_00_pfn_t", sizeof(hsa_ven_amd_aqlprofile_1_00_pfn_t)}}; static const size_t num_tables = sizeof(sizes) / sizeof(sizes_t); @@ -448,12 +449,16 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v } if (extension == HSA_EXTENSION_AMD_LOADER) { - if (version_major > 1) return HSA_STATUS_ERROR; - hsa_ven_amd_loader_1_00_pfn_t ext_table; + if (version_major != 1) return HSA_STATUS_ERROR; + hsa_ven_amd_loader_1_01_pfn_t ext_table; ext_table.hsa_ven_amd_loader_query_host_address = hsa_ven_amd_loader_query_host_address; ext_table.hsa_ven_amd_loader_query_segment_descriptors = hsa_ven_amd_loader_query_segment_descriptors; ext_table.hsa_ven_amd_loader_query_executable = hsa_ven_amd_loader_query_executable; + ext_table.hsa_ven_amd_loader_executable_iterate_loaded_code_objects = + hsa_ven_amd_loader_executable_iterate_loaded_code_objects; + ext_table.hsa_ven_amd_loader_loaded_code_object_get_info = + hsa_ven_amd_loader_loaded_code_object_get_info; memcpy(table, &ext_table, Min(sizeof(ext_table), table_length)); @@ -487,7 +492,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v //---------------------------------------------------------------------------// // Agent //---------------------------------------------------------------------------// -hsa_status_t +hsa_status_t hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void* data), void* data) { TRY; @@ -546,7 +551,7 @@ hsa_status_t hsa_agent_iterate_caches(hsa_agent_t agent_handle, CATCH; } -hsa_status_t +hsa_status_t hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent_handle, uint16_t version_major, uint16_t version_minor, bool* result) { @@ -1036,7 +1041,7 @@ hsa_status_t hsa_memory_deregister(void* address, size_t size) { CATCH; } -hsa_status_t +hsa_status_t hsa_memory_allocate(hsa_region_t region, size_t size, void** ptr) { TRY; IS_OPEN(); @@ -1103,7 +1108,7 @@ hsa_status_t hsa_memory_copy(void* dst, const void* src, size_t size) { // Signals //----------------------------------------------------------------------------- -hsa_status_t +hsa_status_t hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers, const hsa_agent_t* consumers, hsa_signal_t* hsa_signal) { return AMD::hsa_amd_signal_create(initial_value, num_consumers, consumers, 0, hsa_signal); @@ -1151,7 +1156,7 @@ void hsa_signal_store_screlease(hsa_signal_t hsa_signal, hsa_signal_value_t valu CATCHRET(void); } -hsa_signal_value_t +hsa_signal_value_t hsa_signal_wait_relaxed(hsa_signal_t hsa_signal, hsa_signal_condition_t condition, hsa_signal_value_t compare_value, @@ -1400,7 +1405,7 @@ void hsa_signal_subtract_scacq_screl(hsa_signal_t hsa_signal, hsa_signal_value_t CATCHRET(void); } -hsa_signal_value_t +hsa_signal_value_t hsa_signal_exchange_relaxed(hsa_signal_t hsa_signal, hsa_signal_value_t value) { TRY; @@ -1829,13 +1834,13 @@ hsa_status_t hsa_code_object_get_info( if (status != HSA_STATUS_SUCCESS) { return status; } - + hsa_isa_t isa_handle = {0}; status = HSA::hsa_isa_from_name(isa_name, &isa_handle); if (status != HSA_STATUS_SUCCESS) { return status; } - + *((hsa_isa_t*)value) = isa_handle; return HSA_STATUS_SUCCESS; } @@ -2541,6 +2546,9 @@ hsa_status_t hsa_status_string( case HSA_STATUS_ERROR_INVALID_ISA: *status_string = "HSA_STATUS_ERROR_INVALID_ISA: The instruction set architecture is invalid."; break; + case HSA_STATUS_ERROR_INVALID_ISA_NAME: + *status_string = "HSA_STATUS_ERROR_INVALID_ISA_NAME: The instruction set architecture name is invalid."; + break; case HSA_STATUS_ERROR_INVALID_CODE_OBJECT: *status_string = "HSA_STATUS_ERROR_INVALID_CODE_OBJECT: The code object is invalid."; break; diff --git a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp b/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp index 75814a8d16..7d1e3ccbf2 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -73,7 +73,7 @@ void HsaApiTable::Init() { // Update Api table for Amd Extensions and its major id UpdateAmdExts(); hsa_api.amd_ext_ = &amd_ext_api; - + // Initialize Api tables for Finalizer, Image, AqlProfile to NULL // The tables are initialized as part // of Hsa Runtime initialization, including their major ids @@ -87,7 +87,7 @@ void HsaApiTable::Reset() { } void HsaApiTable::CloneExts(void* ext_table, uint32_t table_id) { - + assert(ext_table != NULL && "Invalid extension table linked."); // Update HSA Extension Finalizer Api table @@ -113,20 +113,20 @@ void HsaApiTable::CloneExts(void* ext_table, uint32_t table_id) { } void HsaApiTable::LinkExts(void* ext_table, uint32_t table_id) { - + assert(ext_table != NULL && "Invalid extension table linked."); // Update HSA Extension Finalizer Api table if (table_id == HSA_EXT_FINALIZER_API_TABLE_ID) { finalizer_api = (*(FinalizerExtTable *)ext_table); - hsa_api.finalizer_ext_ = (FinalizerExtTable *)ext_table; + hsa_api.finalizer_ext_ = (FinalizerExtTable *)ext_table; return; } // Update HSA Extension Image Api table if (table_id == HSA_EXT_IMAGE_API_TABLE_ID) { image_api = (*(ImageExtTable *)ext_table); - hsa_api.image_ext_ = (ImageExtTable *)ext_table; + hsa_api.image_ext_ = (ImageExtTable *)ext_table; return; } @@ -140,7 +140,7 @@ void HsaApiTable::LinkExts(void* ext_table, uint32_t table_id) { // Update Api table for Hsa Core Runtime void HsaApiTable::UpdateCore() { - + // Initialize Version of Api Table core_api.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION; core_api.version.minor_id = sizeof(::CoreApiTable); @@ -341,12 +341,12 @@ void HsaApiTable::UpdateCore() { // member variable hsa_amd_image_create_fn while loading // Image extension library void HsaApiTable::UpdateAmdExts() { - + // Initialize Version of Api Table amd_ext_api.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION; amd_ext_api.version.minor_id = sizeof(::AmdExtTable); amd_ext_api.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION; - + // Initialize function pointers for Amd Extension Api's amd_ext_api.hsa_amd_coherency_get_type_fn = AMD::hsa_amd_coherency_get_type; amd_ext_api.hsa_amd_coherency_set_type_fn = AMD::hsa_amd_coherency_set_type; @@ -381,6 +381,7 @@ void HsaApiTable::UpdateAmdExts() { amd_ext_api.hsa_amd_signal_create_fn = AMD::hsa_amd_signal_create; amd_ext_api.hsa_amd_ipc_signal_create_fn = AMD::hsa_amd_ipc_signal_create; amd_ext_api.hsa_amd_ipc_signal_attach_fn = AMD::hsa_amd_ipc_signal_attach; + amd_ext_api.hsa_amd_register_system_event_handler_fn = AMD::hsa_amd_register_system_event_handler; } class Init { diff --git a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index 8f4309fbf9..99da16607e 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -798,4 +798,19 @@ hsa_status_t hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handle, CATCH; } +hsa_status_t hsa_amd_register_system_event_handler( + hsa_amd_event_t type, + hsa_status_t (*callback)(const void* event_specific_data, void* data), + void* data) { + TRY; + IS_OPEN(); + switch (type) { + case GPU_MEMORY_FAULT_EVENT: + return core::Runtime::runtime_singleton_->SetCustomVMFaultHandler(callback, data); + default: + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + CATCH; +} + } // end of AMD namespace diff --git a/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp b/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp index c95b62d141..6b4cb774d2 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp @@ -45,9 +45,13 @@ #include "core/inc/amd_hsa_loader.hpp" #include "core/inc/runtime.h" +using namespace amd::hsa; using namespace core; -hsa_status_t HSA_API hsa_ven_amd_loader_query_host_address( +using loader::Executable; +using loader::LoadedCodeObject; + +hsa_status_t hsa_ven_amd_loader_query_host_address( const void *device_address, const void **host_address) { if (false == core::Runtime::runtime_singleton_->IsOpen()) { @@ -70,7 +74,7 @@ hsa_status_t HSA_API hsa_ven_amd_loader_query_host_address( return HSA_STATUS_SUCCESS; } -hsa_status_t HSA_API hsa_ven_amd_loader_query_segment_descriptors( +hsa_status_t hsa_ven_amd_loader_query_segment_descriptors( hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, size_t *num_segment_descriptors) { if (false == core::Runtime::runtime_singleton_->IsOpen()) { @@ -81,7 +85,7 @@ hsa_status_t HSA_API hsa_ven_amd_loader_query_segment_descriptors( return Runtime::runtime_singleton_->loader()->QuerySegmentDescriptors(segment_descriptors, num_segment_descriptors); } -hsa_status_t HSA_API hsa_ven_amd_loader_query_executable( +hsa_status_t hsa_ven_amd_loader_query_executable( const void *device_address, hsa_executable_t *executable) { @@ -101,3 +105,110 @@ hsa_status_t HSA_API hsa_ven_amd_loader_query_executable( *executable = exec; return HSA_STATUS_SUCCESS; } + +hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects( + hsa_executable_t executable, + hsa_status_t (*callback)( + hsa_executable_t executable, + hsa_loaded_code_object_t loaded_code_object, + void *data), + void *data) { + if (false == core::Runtime::runtime_singleton_->IsOpen()) { + return HSA_STATUS_ERROR_NOT_INITIALIZED; + } + if (nullptr == callback) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + Executable *exec = Executable::Object(executable); + if (!exec) { + return HSA_STATUS_ERROR_INVALID_EXECUTABLE; + } + + return exec->IterateLoadedCodeObjects(callback, data); +} + +hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info( + hsa_loaded_code_object_t loaded_code_object, + hsa_ven_amd_loader_loaded_code_object_info_t attribute, + void *value) { + if (false == core::Runtime::runtime_singleton_->IsOpen()) { + return HSA_STATUS_ERROR_NOT_INITIALIZED; + } + if (nullptr == value) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + const LoadedCodeObject *lcobj = LoadedCodeObject::Object(loaded_code_object); + if (!lcobj) { + return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; + } + + switch (attribute) { + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE: { + *((hsa_executable_t*)value) = lcobj->getExecutable(); + break; + } + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND: { + *((uint32_t*)value) = lcobj->getAgent().handle == 0 + ? HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM + : HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT; + break; + } + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT: { + hsa_agent_t agent = lcobj->getAgent(); + if (agent.handle == 0) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + *((hsa_agent_t*)value) = agent; + break; + } + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE: { + // TODO Update loader so it keeps track if code object was loaded from a + // file or memory. + *((uint32_t*)value) = HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY; + break; + } + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE: { + *((uint64_t*)value) = lcobj->getElfData(); + break; + } + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE: { + *((uint64_t*)value) = lcobj->getElfSize(); + break; + } + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE: { + // TODO Update loader so it keeps track if code object was loaded from a + // file or memory. + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + break; + } + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA: { + // TODO Check if executable is frozen. + // This suggests this code should be moved into LoadedCodeObjectImpl::getinfo + // as is done for other *_get_info methods. Currently LoadedCodeObject has a + // GetInfo method which is likely not used. + // Also should this have a *NOT_FROZEN ststus code added? + // if (state_ != HSA_EXECUTABLE_STATE_FROZEN) { + // return HSA_STATUS_ERROR_INVALID_ARGUMENT; + // } + *((int64_t*)value) = lcobj->getDelta(); + break; + } + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE: { + // TODO Check if executable is frozen. + *((uint64_t*)value) = lcobj->getLoadBase(); + break; + } + case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE: { + // TODO Check if executable is frozen. + *((uint64_t*)value) = lcobj->getLoadSize(); + break; + } + default: { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + } + + return HSA_STATUS_SUCCESS; +} diff --git a/runtime/hsa-runtime/core/runtime/runtime.cpp b/runtime/hsa-runtime/core/runtime/runtime.cpp index 04ac06c7db..59bb32be23 100644 --- a/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -589,8 +589,6 @@ hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) { return HSA_STATUS_SUCCESS; } -uint32_t Runtime::GetQueueId() { return atomic::Increment(&queue_count_); } - hsa_status_t Runtime::SetAsyncSignalHandler(hsa_signal_t signal, hsa_signal_condition_t cond, hsa_signal_value_t value, @@ -1044,48 +1042,86 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) { return false; } - if (runtime_singleton_->flag().enable_vm_fault_message()) { - HsaEvent* vm_fault_event = vm_fault_signal->EopEvent(); + HsaEvent* vm_fault_event = vm_fault_signal->EopEvent(); - const HsaMemoryAccessFault& fault = - vm_fault_event->EventData.EventData.MemoryAccessFault; + HsaMemoryAccessFault& fault = + vm_fault_event->EventData.EventData.MemoryAccessFault; - std::string reason = ""; + hsa_status_t custom_handler_status = HSA_STATUS_ERROR; + // If custom handler is registered, pack the fault info and call the handler + if (runtime_singleton_->vm_fault_handler_custom_ != nullptr) { + hsa_amd_gpu_memory_fault_info_t* fault_info = new hsa_amd_gpu_memory_fault_info_t; + + // Find the faulty agent + auto it = runtime_singleton_->agents_by_node_.find(fault.NodeId); + assert(it != runtime_singleton_->agents_by_node_.end() && "Can't find faulty agent."); + Agent* faulty_agent = it->second.front(); + fault_info->agent = Agent::Convert(faulty_agent); + + fault_info->virtual_address = fault.VirtualAddress; + fault_info->fault_reason_mask = 0x00000000; if (fault.Failure.NotPresent == 1) { - reason += "Page not present or supervisor privilege"; - } else if (fault.Failure.ReadOnly == 1) { - reason += "Write access to a read-only page"; - } else if (fault.Failure.NoExecute == 1) { - reason += "Execute access to a page marked NX"; - } else if (fault.Failure.GpuAccess == 1) { - reason += "Host access only"; - } else if (fault.Failure.ECC == 1) { - reason += "ECC failure (if supported by HW)"; - } else { - reason += "Unknown"; + fault_info->fault_reason_mask = fault_info->fault_reason_mask | 0x00000001; + } + if (fault.Failure.ReadOnly == 1) { + fault_info->fault_reason_mask = fault_info->fault_reason_mask | 0x00000010; + } + if (fault.Failure.NoExecute == 1) { + fault_info->fault_reason_mask = fault_info->fault_reason_mask | 0x00000100; + } + if (fault.Failure.GpuAccess == 1) { + fault_info->fault_reason_mask = fault_info->fault_reason_mask | 0x00001000; + } + if (fault.Failure.ECC == 1) { + fault_info->fault_reason_mask = fault_info->fault_reason_mask | 0x00010000; + } + if (fault.Failure.Imprecise == 1) { + fault_info->fault_reason_mask = fault_info->fault_reason_mask | 0x00100000; } - fprintf(stderr, - "Memory access fault by GPU node-%u on address %p%s. Reason: %s.\n", - fault.NodeId, reinterpret_cast(fault.VirtualAddress), - (fault.Failure.Imprecise == 1) ? "(may not be exact address)" : "", - reason.c_str()); - } else { - assert(false && "GPU memory access fault."); + custom_handler_status = runtime_singleton_->vm_fault_handler_custom_(fault_info, + runtime_singleton_->vm_fault_handler_user_data_); } - std::abort(); + // No custom VM fault handler registered or it failed. + if (custom_handler_status != HSA_STATUS_SUCCESS) { + if (runtime_singleton_->flag().enable_vm_fault_message()) { + std::string reason = ""; + if (fault.Failure.NotPresent == 1) { + reason += "Page not present or supervisor privilege"; + } else if (fault.Failure.ReadOnly == 1) { + reason += "Write access to a read-only page"; + } else if (fault.Failure.NoExecute == 1) { + reason += "Execute access to a page marked NX"; + } else if (fault.Failure.GpuAccess == 1) { + reason += "Host access only"; + } else if (fault.Failure.ECC == 1) { + reason += "ECC failure (if supported by HW)"; + } else { + reason += "Unknown"; + } + fprintf(stderr, + "Memory access fault by GPU node-%u on address %p%s. Reason: %s.\n", + fault.NodeId, reinterpret_cast(fault.VirtualAddress), + (fault.Failure.Imprecise == 1) ? "(may not be exact address)" : "", + reason.c_str()); + } else { + assert(false && "GPU memory access fault."); + } + + std::abort(); + } // No need to keep the signal because we are done. return false; } Runtime::Runtime() : blit_agent_(NULL), - queue_count_(0), sys_clock_freq_(0), vm_fault_event_(nullptr), vm_fault_signal_(nullptr), + vm_fault_handler_custom_(nullptr), ref_count_(0) { start_svm_address_ = 0; #if defined(HSA_LARGE_MODEL) @@ -1363,4 +1399,15 @@ void Runtime::AsyncEvents::Clear() { arg_.clear(); } +hsa_status_t Runtime::SetCustomVMFaultHandler( + hsa_status_t (*callback)(const void* event_specific_data, void* data), + void* data) { + if (vm_fault_handler_custom_ != nullptr) { + return HSA_STATUS_ERROR; + } else { + vm_fault_handler_custom_ = callback; + vm_fault_handler_user_data_ = data; + return HSA_STATUS_SUCCESS; + } +} } // namespace core diff --git a/runtime/hsa-runtime/hsacore.so.def b/runtime/hsa-runtime/hsacore.so.def index e953304d64..9299b8de4b 100644 --- a/runtime/hsa-runtime/hsacore.so.def +++ b/runtime/hsa-runtime/hsacore.so.def @@ -204,9 +204,9 @@ global: hsa_ext_image_destroy; hsa_ext_sampler_create; hsa_ext_sampler_destroy; - hsa_ext_image_get_capability_with_layout; - hsa_ext_image_data_get_info_with_layout; - hsa_ext_image_create_with_layout; + hsa_ext_image_get_capability_with_layout; + hsa_ext_image_data_get_info_with_layout; + hsa_ext_image_create_with_layout; hsa_amd_pointer_info; hsa_amd_pointer_info_set_userdata; hsa_amd_ipc_memory_create; @@ -214,6 +214,7 @@ global: hsa_amd_ipc_memory_detach; hsa_amd_ipc_signal_create; hsa_amd_ipc_signal_attach; + hsa_amd_register_system_event_handler; local: *; diff --git a/runtime/hsa-runtime/inc/amd_hsa_queue.h b/runtime/hsa-runtime/inc/amd_hsa_queue.h index 60e4c079cc..b37bb53f36 100644 --- a/runtime/hsa-runtime/inc/amd_hsa_queue.h +++ b/runtime/hsa-runtime/inc/amd_hsa_queue.h @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL diff --git a/runtime/hsa-runtime/inc/hsa_api_trace.h b/runtime/hsa-runtime/inc/hsa_api_trace.h index a1927198d2..455e3399ec 100644 --- a/runtime/hsa-runtime/inc/hsa_api_trace.h +++ b/runtime/hsa-runtime/inc/hsa_api_trace.h @@ -2,24 +2,24 @@ // // The University of Illinois/NCSA // Open Source License (NCSA) -// +// // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. -// +// // Developed by: -// +// // AMD Research and AMD HSA Software Development -// +// // Advanced Micro Devices, Inc. -// +// // www.amd.com -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: -// +// // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright @@ -29,7 +29,7 @@ // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -91,7 +91,7 @@ struct ApiTableVersion { uint32_t reserved; }; -// Table to export HSA Finalizer Extension Apis +// Table to export HSA Finalizer Extension Apis struct FinalizerExtTable { ApiTableVersion version; decltype(hsa_ext_program_create)* hsa_ext_program_create_fn; @@ -169,6 +169,7 @@ struct AmdExtTable { decltype(hsa_amd_signal_create)* hsa_amd_signal_create_fn; decltype(hsa_amd_ipc_signal_create)* hsa_amd_ipc_signal_create_fn; decltype(hsa_amd_ipc_signal_attach)* hsa_amd_ipc_signal_attach_fn; + decltype(hsa_amd_register_system_event_handler)* hsa_amd_register_system_event_handler_fn; }; // Table to export HSA Core Runtime Apis @@ -355,7 +356,7 @@ struct HsaApiTable { // Version of Hsa Api Table ApiTableVersion version; - + // Table of function pointers to HSA Core Runtime CoreApiTable* core_; @@ -364,7 +365,7 @@ struct HsaApiTable { // Table of function pointers to HSA Finalizer Extension FinalizerExtTable* finalizer_ext_; - + // Table of function pointers to HSA Image Extension ImageExtTable* image_ext_; @@ -386,12 +387,12 @@ struct HsaApiTableContainer { root.version.major_id = HSA_API_TABLE_MAJOR_VERSION; root.version.minor_id = sizeof(HsaApiTable); root.version.step_id = HSA_API_TABLE_STEP_VERSION; - + core.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION; core.version.minor_id = sizeof(CoreApiTable); core.version.step_id = HSA_CORE_API_TABLE_STEP_VERSION; root.core_ = &core; - + amd_ext.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION; amd_ext.version.minor_id = sizeof(AmdExtTable); amd_ext.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION; diff --git a/runtime/hsa-runtime/inc/hsa_ext_amd.h b/runtime/hsa-runtime/inc/hsa_ext_amd.h index 28031b30f2..78db49608f 100755 --- a/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -711,17 +711,17 @@ typedef enum { /** * @brief Get the current value of an attribute of a memory pool. - * + * * @param[in] memory_pool A valid memory pool. - * + * * @param[in] attribute Attribute to query. - * + * * @param[out] value Pointer to a application-allocated buffer where to store * the value of the attribute. If the buffer passed by the application is not * large enough to hold the value of @p attribute, the behavior is undefined. - * + * * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. - * + * */ hsa_status_t HSA_API hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool, @@ -741,7 +741,7 @@ hsa_status_t HSA_API * * @param[in] agent A valid agent. * - * @param[in] callback Callback to be invoked on the same thread that called + * @param[in] callback Callback to be invoked on the same thread that called * ::hsa_amd_agent_iterate_memory_pools, serially, once per memory pool that is * associated with the agent. The HSA runtime passes two arguments to the * callback: the memory pool, and the application data. If @p callback @@ -1265,7 +1265,7 @@ hsa_status_t HSA_API * @param[out] metadata_size Size of metadata in bytes, may be NULL * * @param[out] metadata Pointer to metadata, may be NULL - * + * * @retval HSA_STATUS_SUCCESS if successfully mapped * * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized @@ -1275,13 +1275,13 @@ hsa_status_t HSA_API * * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT all other errors */ -hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents, - hsa_agent_t* agents, - int interop_handle, - uint32_t flags, - size_t* size, - void** ptr, - size_t* metadata_size, +hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents, + hsa_agent_t* agents, + int interop_handle, + uint32_t flags, + size_t* size, + void** ptr, + size_t* metadata_size, const void** metadata); /** @@ -1300,7 +1300,7 @@ typedef struct hsa_amd_image_descriptor_s { Version number of the descriptor */ uint32_t version; - + /* Vendor and device PCI IDs for the format as VENDOR_ID<<16|DEVICE_ID. */ @@ -1625,6 +1625,66 @@ hsa_status_t HSA_API hsa_amd_ipc_signal_create(hsa_signal_t signal, hsa_amd_ipc_ hsa_status_t HSA_API hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handle, hsa_signal_t* signal); +/** + * @brief GPU system event type. + */ +typedef enum hsa_amd_event_s { + /** + * AMD GPU memory fault. + */ + GPU_MEMORY_FAULT_EVENT = 0 +} hsa_amd_event_t; + +/** + * @brief AMD GPU memory fault event data (event_specific_data) type passed to event handler. + */ +typedef struct hsa_amd_gpu_memory_fault_info_s { + /** + * The agent where the memory fault occurred. + */ + hsa_agent_t agent; + /** + * Virtual address accessed. + */ + uint64_t virtual_address; + /** + * Bit field encoding the memory access failure reasons. There could be multiple bits set + * for one fault. + * 0x00000001 Page not present or supervisor privilege. + * 0x00000010 Write access to a read-only page. + * 0x00000100 Execute access to a page marked NX. + * 0x00001000 Host access only. + * 0x00010000 ECC failure (if supported by HW). + * 0x00100000 Can't determine the exact fault address. + */ + uint32_t fault_reason_mask; +} hsa_amd_gpu_memory_fault_info_t; + +/** + * @brief Register AMD GPU event handler. + * + * @param[in] type The GPU event type. + * + * @param[in] callback Callback to be invoked when the event is triggered. + * The HSA runtime passes two arguments to the callback: the event data and user data. + * Event data is defined per event by the HSA runtime. For GPU_MEMORY_FAULT_EVENT, + * the event data type is hsa_amd_gpu_memory_fault_info_t. + * + * @param[in] data User data that is passed to @p callback. May be NULL. + * + * @param[out] callback Function pointer to the handler. + * + * @retval ::HSA_STATUS_SUCCESS The handler has been registered successfully. + * + * @retval ::HSA_STATUS_ERROR A handler for the event has already been registered. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p type is invalid. + */ +hsa_status_t hsa_amd_register_system_event_handler( + hsa_amd_event_t type, + hsa_status_t (*callback)(const void* event_specific_data, void* data), + void* data); + #ifdef __cplusplus } // end extern "C" block #endif diff --git a/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h b/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h index 020dd9173f..69a6b18a06 100644 --- a/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h +++ b/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h @@ -83,7 +83,7 @@ extern "C" { * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or * null, or @p host_address is null. */ -hsa_status_t HSA_API hsa_ven_amd_loader_query_host_address( +hsa_status_t hsa_ven_amd_loader_query_host_address( const void *device_address, const void **host_address); @@ -220,7 +220,7 @@ typedef struct hsa_ven_amd_loader_segment_descriptor_s { * does not point to number that exactly matches total number of loaded memory * segment descriptors. */ -hsa_status_t HSA_API hsa_ven_amd_loader_query_segment_descriptors( +hsa_status_t hsa_ven_amd_loader_query_segment_descriptors( hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, size_t *num_segment_descriptors); @@ -242,13 +242,169 @@ hsa_status_t hsa_ven_amd_loader_query_executable( const void *device_address, hsa_executable_t *executable); +//===----------------------------------------------------------------------===// + +/** + * @brief Iterate over the loaded code objects in an executable, and invoke + * an application-defined callback on every iteration. + * + * @param[in] executable Executable. + * + * @param[in] callback Callback to be invoked once per loaded code object. The + * HSA runtime passes three arguments to the callback: the executable, a + * loaded code object, and the application data. If @p callback returns a + * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the + * traversal stops and ::hsa_executable_iterate_symbols returns that status + * value. + * + * @param[in] data Application data that is passed to @p callback on every + * iteration. May be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL. + */ +hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects( + hsa_executable_t executable, + hsa_status_t (*callback)( + hsa_executable_t executable, + hsa_loaded_code_object_t loaded_code_object, + void *data), + void *data); + +/** + * @brief Loaded code object kind. + */ +typedef enum { + /** + * Program code object. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1, + /** + * Agent code object. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2 +} hsa_ven_amd_loader_loaded_code_object_kind_t; + +/** + * @brief Loaded code object attributes. + */ +typedef enum hsa_ven_amd_loader_loaded_code_object_info_e { + /** + * The executable in which this loaded code object is loaded. The + * type of this attribute is ::hsa_executable_t. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1, + /** + * The kind of this loaded code object. The type of this attribute is + * ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2, + /** + * The agent on which this loaded code object is loaded. The + * value of this attribute is only defined if + * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is + * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this + * attribute is ::hsa_agent_t. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3, + /** + * The storage type of the code object reader used to load the loaded code object. + * The type of this attribute is ::uint32_t interpreted as a + * ::hsa_ven_amd_loader_code_object_storage_type_t. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4, + /** + * The memory address of the first byte of the code object that was loaaded. + * The value of this attribute is only defined if + * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is + * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this + * attribute is ::uint64_t. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5, + /** + * The memory size in bytes of the code object that was loaaded. + * The value of this attribute is only defined if + * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is + * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this + * attribute is ::uint64_t. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6, + /** + * The file descriptor of the code object that was loaaded. + * The value of this attribute is only defined if + * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is + * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this + * attribute is ::int. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7, + /** + * The signed byte address difference of the memory address at which the code + * object is loaded minus the virtual address specified in the code object + * that is loaded. The value of this attribute is only defined if the + * executable in which the code object is loaded is froozen. The type of this + * attribute is ::int64_t. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8, +/** + * The base memory address at which the code object is loaded. This is the + * base address of the allocation for the lowest addressed segment of the code + * object that is loaded. Note that any non-loaded segments before the first + * loaded segment are ignored. The value of this attribute is only defined if + * the executable in which the code object is loaded is froozen. The type of + * this attribute is ::uint64_t. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9, + /** + * The byte size of the loaded code objects contiguous memory allocation. The + * value of this attribute is only defined if the executable in which the code + * object is loaded is froozen. The type of this attribute is ::uint64_t. + */ + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10 +} hsa_ven_amd_loader_loaded_code_object_info_t; + +/** + * @brief Get the current value of an attribute for a given loaded code + * object. + * + * @param[in] loaded_code_object Loaded code object. + * + * @param[in] attribute Attribute to query. + * + * @param[out] value Pointer to an application-allocated buffer where to store + * the value of the attribute. If the buffer passed by the application is not + * large enough to hold the value of @p attribute, the behavior is undefined. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is + * invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid + * loaded code object attribute, or @p value is NULL. + */ +hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info( + hsa_loaded_code_object_t loaded_code_object, + hsa_ven_amd_loader_loaded_code_object_info_t attribute, + void *value); + +//===----------------------------------------------------------------------===// + /** * @brief Extension version. */ #define hsa_ven_amd_loader 001000 /** - * @brief Extension function table. + * @brief Extension function table version 1.00. */ typedef struct hsa_ven_amd_loader_1_00_pfn_s { hsa_status_t (*hsa_ven_amd_loader_query_host_address)( @@ -264,6 +420,36 @@ typedef struct hsa_ven_amd_loader_1_00_pfn_s { hsa_executable_t *executable); } hsa_ven_amd_loader_1_00_pfn_t; +/** + * @brief Extension function table version 1.01. + */ +typedef struct hsa_ven_amd_loader_1_01_pfn_s { + hsa_status_t (*hsa_ven_amd_loader_query_host_address)( + const void *device_address, + const void **host_address); + + hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t *num_segment_descriptors); + + hsa_status_t (*hsa_ven_amd_loader_query_executable)( + const void *device_address, + hsa_executable_t *executable); + + hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)( + hsa_executable_t executable, + hsa_status_t (*callback)( + hsa_executable_t executable, + hsa_loaded_code_object_t loaded_code_object, + void *data), + void *data); + + hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)( + hsa_loaded_code_object_t loaded_code_object, + hsa_ven_amd_loader_loaded_code_object_info_t attribute, + void *value); +} hsa_ven_amd_loader_1_01_pfn_t; + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/runtime/hsa-runtime/loader/executable.cpp b/runtime/hsa-runtime/loader/executable.cpp index 8831c7d60a..7b88e0d4a3 100644 --- a/runtime/hsa-runtime/loader/executable.cpp +++ b/runtime/hsa-runtime/loader/executable.cpp @@ -835,6 +835,7 @@ hsa_status_t ExecutableImpl::IterateProgramSymbols( hsa_status_t ExecutableImpl::IterateLoadedCodeObjects( hsa_status_t (*callback)( + hsa_executable_t executable, hsa_loaded_code_object_t loaded_code_object, void *data), void *data) @@ -843,7 +844,10 @@ hsa_status_t ExecutableImpl::IterateLoadedCodeObjects( assert(callback); for (auto &loaded_code_object : loaded_code_objects) { - hsa_status_t status = callback(LoadedCodeObject::Handle(loaded_code_object), data); + hsa_status_t status = callback( + Executable::Handle(this), + LoadedCodeObject::Handle(loaded_code_object), + data); if (status != HSA_STATUS_SUCCESS) { return status; } @@ -890,6 +894,40 @@ size_t ExecutableImpl::QuerySegmentDescriptors( return i - first_empty_segment_descriptor; } +hsa_agent_t LoadedCodeObjectImpl::getAgent() const { + assert(loaded_segments.size() == 1 && "Only supports code objects v2+"); + return loaded_segments.front()->Agent(); +} +hsa_executable_t LoadedCodeObjectImpl::getExecutable() const { + assert(loaded_segments.size() == 1 && "Only supports code objects v2+"); + return Executable::Handle(loaded_segments.front()->Owner()); +} +uint64_t LoadedCodeObjectImpl::getElfData() const { + return reinterpret_cast(elf_data); +} +uint64_t LoadedCodeObjectImpl::getElfSize() const { + return (uint64_t)elf_size; +} +uint64_t LoadedCodeObjectImpl::getStorageOffset() const { + assert(loaded_segments.size() == 1 && "Only supports code objects v2+"); + return (uint64_t)loaded_segments.front()->StorageOffset(); +} +uint64_t LoadedCodeObjectImpl::getLoadBase() const { + // TODO Add support for code objects with 0 segments. + assert(loaded_segments.size() == 1 && "Only supports code objects v2+"); + return reinterpret_cast(loaded_segments.front()->Address(0)); +} +uint64_t LoadedCodeObjectImpl::getLoadSize() const { + // TODO Add support for code objects with 0 or >1 segments. + assert(loaded_segments.size() == 1 && "Only supports code objects v2+"); + return (uint64_t)loaded_segments.front()->Size(); +} +int64_t LoadedCodeObjectImpl::getDelta() const { + // TODO Add support for code objects with 0 segments. + assert(loaded_segments.size() == 1 && "Only supports code objects v2+"); + return getLoadBase() - loaded_segments.front()->VAddr(); +} + hsa_executable_t AmdHsaCodeLoader::FindExecutable(uint64_t device_address) { hsa_executable_t execHandle = {0}; @@ -1159,7 +1197,7 @@ hsa_status_t ExecutableImpl::LoadSegmentsV2(hsa_agent_t agent, if (!ptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; Segment *load_segment = new Segment(this, agent, AMDGPU_HSA_SEGMENT_CODE_AGENT, - ptr, size, vaddr, 0); + ptr, size, vaddr, c->DataSegment(0)->offset()); if (!load_segment) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; hsa_status_t status = HSA_STATUS_SUCCESS; diff --git a/runtime/hsa-runtime/loader/executable.hpp b/runtime/hsa-runtime/loader/executable.hpp index d955f41696..3f90e276f1 100644 --- a/runtime/hsa-runtime/loader/executable.hpp +++ b/runtime/hsa-runtime/loader/executable.hpp @@ -277,6 +277,15 @@ public: void Print(std::ostream& out) override; void Destroy() override {} + + hsa_agent_t getAgent() const override; + hsa_executable_t getExecutable() const override; + uint64_t getElfData() const override; + uint64_t getElfSize() const override; + uint64_t getStorageOffset() const override; + uint64_t getLoadBase() const override; + uint64_t getLoadSize() const override; + int64_t getDelta() const override; }; class Segment : public LoadedSegment, public ExecutableObject { @@ -432,6 +441,7 @@ public: hsa_status_t IterateLoadedCodeObjects( hsa_status_t (*callback)( + hsa_executable_t executable, hsa_loaded_code_object_t loaded_code_object, void *data), void *data) override;