PC Sampling: Add start stop and flush APIs

Create PC Sampling APIs for start and stop functions. And create stub
for flush function.

Change-Id: I7a093b29dc87e34ac06faaae6cac2be50e4663e1


[ROCm/ROCR-Runtime commit: a842247482]
Этот коммит содержится в:
David Yat Sin
2023-09-23 15:58:13 +00:00
родитель 566e2c60fd
Коммит bcdecc7ff4
7 изменённых файлов: 161 добавлений и 3 удалений
+13
Просмотреть файл
@@ -199,6 +199,12 @@ class GpuAgentInt : public core::Agent {
pcs::PcsRuntime::PcSamplingSession& session) = 0;
virtual hsa_status_t PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) = 0;
virtual hsa_status_t PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) = 0;
virtual hsa_status_t PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session) = 0;
virtual hsa_status_t PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session) = 0;
};
class GpuAgent : public GpuAgentInt {
@@ -485,6 +491,12 @@ class GpuAgent : public GpuAgentInt {
hsa_status_t PcSamplingCreateFromId(HsaPcSamplingTraceId pcsId,
pcs::PcsRuntime::PcSamplingSession& session);
hsa_status_t PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session);
hsa_status_t PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session);
hsa_status_t PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session);
hsa_status_t PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session);
static void PcSamplingThreadRun(void* agent);
void PcSamplingThread();
// @brief Node properties.
const HsaNodeProperties properties_;
@@ -691,6 +703,7 @@ class GpuAgent : public GpuAgentInt {
/* PC Sampling fields - begin */
typedef struct {
os::Thread thread;
pcs::PcsRuntime::PcSamplingSession* session;
} pcs_hosttrap_t;
+76
Просмотреть файл
@@ -2400,6 +2400,8 @@ hsa_status_t GpuAgent::PcSamplingCreateFromId(HsaPcSamplingTraceId ioctlId,
}
hsa_status_t GpuAgent::PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) {
if (PcSamplingStop(session) != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR;
pcs_hosttrap_t& ht_data = pcs_hosttrap_data_;
HSAKMT_STATUS retKmt = hsaKmtPcSamplingDestroy(node_id(), session.ThunkId());
ht_data.session = NULL;
@@ -2407,5 +2409,79 @@ hsa_status_t GpuAgent::PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& ses
return (retKmt == HSAKMT_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
}
hsa_status_t GpuAgent::PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) {
if (session.isActive()) return HSA_STATUS_SUCCESS;
pcs_hosttrap_t& ht_data = pcs_hosttrap_data_;
auto method = session.method();
if (method == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
if (ht_data.session->isActive()) {
debug_warning("Already have a Host trap session in progress!");
return (hsa_status_t)HSA_STATUS_ERROR_RESOURCE_BUSY;
}
ht_data.session->start();
// This thread will handle all hosttrap sessions on this agent
// In the future, there will be another thread to handle stochastic sessions.
ht_data.thread = os::CreateThread(PcSamplingThreadRun, (void*)this);
if (!ht_data.thread)
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
"Failed to start PC Sampling thread.");
}
if (hsaKmtPcSamplingStart(node_id(), session.ThunkId()) == HSAKMT_STATUS_SUCCESS)
return HSA_STATUS_SUCCESS;
debug_print("Failed to start PC sampling session with thunkId:%d\n", session.ThunkId());
if (method == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
ht_data.session->stop();
os::WaitForThread(ht_data.thread);
os::CloseThread(ht_data.thread);
ht_data.thread = NULL;
}
return HSA_STATUS_ERROR;
}
hsa_status_t GpuAgent::PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session) {
if (!session.isActive()) return HSA_STATUS_SUCCESS;
pcs_hosttrap_t& ht_data = pcs_hosttrap_data_;
session.stop();
HSAKMT_STATUS retKmt = hsaKmtPcSamplingStop(node_id(), session.ThunkId());
if (retKmt != HSAKMT_STATUS_SUCCESS)
throw AMD::hsa_exception(HSA_STATUS_ERROR, "Failed to stop PC Sampling session.");
if (session.method() == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
os::WaitForThread(ht_data.thread);
os::CloseThread(ht_data.thread);
ht_data.thread = NULL;
}
return HSA_STATUS_SUCCESS;
}
void GpuAgent::PcSamplingThread() {
pcs_hosttrap_t& ht_data = pcs_hosttrap_data_;
while (ht_data.session->isActive()) {
// Implement code to read data from 2nd level trap handler here
sleep(1);
}
debug_print("PcSamplingThread::Exiting\n");
}
void GpuAgent::PcSamplingThreadRun(void* _agent) {
GpuAgent* agent = (GpuAgent*)_agent;
agent->PcSamplingThread();
debug_print("PcSamplingThread exiting...");
}
hsa_status_t GpuAgent::PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session) {
// TODO: implement me
return HSA_STATUS_SUCCESS;
}
} // namespace amd
} // namespace rocr
+3
Просмотреть файл
@@ -256,6 +256,9 @@ global:
hsa_ven_amd_pcs_create;
hsa_ven_amd_pcs_create_from_id;
hsa_ven_amd_pcs_destroy;
hsa_ven_amd_pcs_start;
hsa_ven_amd_pcs_stop;
hsa_ven_amd_pcs_flush;
local:
*;
+18
Просмотреть файл
@@ -134,6 +134,24 @@ hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t handle) {
CATCH;
}
hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t handle) {
TRY;
return PcsRuntime::instance()->PcSamplingStart(handle);
CATCH;
}
hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t handle) {
TRY;
return PcsRuntime::instance()->PcSamplingStop(handle);
CATCH;
}
hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t handle) {
TRY;
return PcsRuntime::instance()->PcSamplingFlush(handle);
CATCH;
}
void LoadPcSampling(core::PcSamplingExtTableInternal* pcs_api) {
pcs_api->hsa_ven_amd_pcs_iterate_configuration_fn = hsa_ven_amd_pcs_iterate_configuration;
pcs_api->hsa_ven_amd_pcs_create_fn = hsa_ven_amd_pcs_create;
+6
Просмотреть файл
@@ -73,6 +73,12 @@ hsa_status_t hsa_ven_amd_pcs_create_from_id(
hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling);
hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling);
hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling);
hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling);
// Update Api table with func pointers that implement functionality
void LoadPcSampling(core::PcSamplingExtTableInternal* pcs_api);
+36 -1
Просмотреть файл
@@ -104,7 +104,7 @@ PcsRuntime::PcSamplingSession::PcSamplingSession(
core::Agent* _agent, hsa_ven_amd_pcs_method_kind_t method, hsa_ven_amd_pcs_units_t units,
size_t interval, size_t latency, size_t buffer_size,
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data)
: agent(_agent), thunkId_(0), valid_(true), sample_size_(0) {
: agent(_agent), thunkId_(0), active_(false), valid_(true), sample_size_(0) {
switch (method) {
case HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1:
sample_size_ = sizeof(perf_sample_hosttrap_v1_t);
@@ -240,6 +240,41 @@ hsa_status_t PcsRuntime::PcSamplingDestroy(hsa_ven_amd_pcs_t handle) {
return ret;
}
hsa_status_t PcsRuntime::PcSamplingStart(hsa_ven_amd_pcs_t handle) {
ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
if (pcSamplingSessionIt == pc_sampling_.end()) {
debug_warning(false && "Cannot find PcSampling session");
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);
return gpu_agent->PcSamplingStart(pcSamplingSessionIt->second);
}
hsa_status_t PcsRuntime::PcSamplingStop(hsa_ven_amd_pcs_t handle) {
ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
if (pcSamplingSessionIt == pc_sampling_.end()) {
debug_warning(false && "Cannot find PcSampling session");
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);
return gpu_agent->PcSamplingStop(pcSamplingSessionIt->second);
}
hsa_status_t PcsRuntime::PcSamplingFlush(hsa_ven_amd_pcs_t handle) {
ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
if (pcSamplingSessionIt == pc_sampling_.end()) {
debug_warning(false && "Cannot find PcSampling session");
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);
return gpu_agent->PcSamplingFlush(pcSamplingSessionIt->second);
}
} // namespace pcs
} // namespace rocr
+9 -2
Просмотреть файл
@@ -70,7 +70,7 @@ class PcsRuntime {
class PcSamplingSession {
public:
PcSamplingSession() : agent(NULL), thunkId_(0){};
PcSamplingSession() : agent(NULL), thunkId_(0), active_(false){};
PcSamplingSession(core::Agent* agent, hsa_ven_amd_pcs_method_kind_t method,
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
size_t buffer_size, hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
@@ -88,11 +88,15 @@ class PcsRuntime {
core::Agent* agent;
void SetThunkId(HsaPcSamplingTraceId thunkId) { thunkId_ = thunkId; }
HsaPcSamplingTraceId ThunkId() { return thunkId_; }
bool isActive() { return active_; }
void start() { active_ = true; }
void stop() { active_ = false; }
private:
HsaPcSamplingTraceId thunkId_;
bool valid_; // Whether configuration parameters are valid
bool active_; // Set to true when the session is started
bool valid_; // Whether configuration parameters are valid
size_t sample_size_;
struct client_session_data_t {
@@ -126,6 +130,9 @@ class PcsRuntime {
void* client_cb_data, hsa_ven_amd_pcs_t* handle);
hsa_status_t PcSamplingDestroy(hsa_ven_amd_pcs_t handle);
hsa_status_t PcSamplingStart(hsa_ven_amd_pcs_t handle);
hsa_status_t PcSamplingStop(hsa_ven_amd_pcs_t handle);
hsa_status_t PcSamplingFlush(hsa_ven_amd_pcs_t handle);
private:
/// @brief Initialize singleton object, must be called once.