PC Sampling: Add start stop and flush APIs
Create PC Sampling APIs for start and stop functions. And create stub
for flush function.
Change-Id: I7a093b29dc87e34ac06faaae6cac2be50e4663e1
[ROCm/ROCR-Runtime commit: a842247482]
Этот коммит содержится в:
@@ -199,6 +199,12 @@ class GpuAgentInt : public core::Agent {
|
||||
pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
|
||||
virtual hsa_status_t PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session) = 0;
|
||||
};
|
||||
|
||||
class GpuAgent : public GpuAgentInt {
|
||||
@@ -485,6 +491,12 @@ class GpuAgent : public GpuAgentInt {
|
||||
hsa_status_t PcSamplingCreateFromId(HsaPcSamplingTraceId pcsId,
|
||||
pcs::PcsRuntime::PcSamplingSession& session);
|
||||
hsa_status_t PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session);
|
||||
hsa_status_t PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session);
|
||||
hsa_status_t PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session);
|
||||
hsa_status_t PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session);
|
||||
|
||||
static void PcSamplingThreadRun(void* agent);
|
||||
void PcSamplingThread();
|
||||
|
||||
// @brief Node properties.
|
||||
const HsaNodeProperties properties_;
|
||||
@@ -691,6 +703,7 @@ class GpuAgent : public GpuAgentInt {
|
||||
|
||||
/* PC Sampling fields - begin */
|
||||
typedef struct {
|
||||
os::Thread thread;
|
||||
pcs::PcsRuntime::PcSamplingSession* session;
|
||||
} pcs_hosttrap_t;
|
||||
|
||||
|
||||
@@ -2400,6 +2400,8 @@ hsa_status_t GpuAgent::PcSamplingCreateFromId(HsaPcSamplingTraceId ioctlId,
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) {
|
||||
if (PcSamplingStop(session) != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR;
|
||||
|
||||
pcs_hosttrap_t& ht_data = pcs_hosttrap_data_;
|
||||
HSAKMT_STATUS retKmt = hsaKmtPcSamplingDestroy(node_id(), session.ThunkId());
|
||||
ht_data.session = NULL;
|
||||
@@ -2407,5 +2409,79 @@ hsa_status_t GpuAgent::PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& ses
|
||||
return (retKmt == HSAKMT_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) {
|
||||
if (session.isActive()) return HSA_STATUS_SUCCESS;
|
||||
|
||||
pcs_hosttrap_t& ht_data = pcs_hosttrap_data_;
|
||||
|
||||
auto method = session.method();
|
||||
if (method == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
|
||||
if (ht_data.session->isActive()) {
|
||||
debug_warning("Already have a Host trap session in progress!");
|
||||
return (hsa_status_t)HSA_STATUS_ERROR_RESOURCE_BUSY;
|
||||
}
|
||||
ht_data.session->start();
|
||||
// This thread will handle all hosttrap sessions on this agent
|
||||
// In the future, there will be another thread to handle stochastic sessions.
|
||||
ht_data.thread = os::CreateThread(PcSamplingThreadRun, (void*)this);
|
||||
if (!ht_data.thread)
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
|
||||
"Failed to start PC Sampling thread.");
|
||||
}
|
||||
|
||||
if (hsaKmtPcSamplingStart(node_id(), session.ThunkId()) == HSAKMT_STATUS_SUCCESS)
|
||||
return HSA_STATUS_SUCCESS;
|
||||
|
||||
debug_print("Failed to start PC sampling session with thunkId:%d\n", session.ThunkId());
|
||||
if (method == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
|
||||
ht_data.session->stop();
|
||||
os::WaitForThread(ht_data.thread);
|
||||
os::CloseThread(ht_data.thread);
|
||||
ht_data.thread = NULL;
|
||||
}
|
||||
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session) {
|
||||
if (!session.isActive()) return HSA_STATUS_SUCCESS;
|
||||
|
||||
pcs_hosttrap_t& ht_data = pcs_hosttrap_data_;
|
||||
|
||||
session.stop();
|
||||
|
||||
HSAKMT_STATUS retKmt = hsaKmtPcSamplingStop(node_id(), session.ThunkId());
|
||||
if (retKmt != HSAKMT_STATUS_SUCCESS)
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR, "Failed to stop PC Sampling session.");
|
||||
|
||||
if (session.method() == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
|
||||
os::WaitForThread(ht_data.thread);
|
||||
os::CloseThread(ht_data.thread);
|
||||
ht_data.thread = NULL;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void GpuAgent::PcSamplingThread() {
|
||||
pcs_hosttrap_t& ht_data = pcs_hosttrap_data_;
|
||||
while (ht_data.session->isActive()) {
|
||||
// Implement code to read data from 2nd level trap handler here
|
||||
sleep(1);
|
||||
}
|
||||
debug_print("PcSamplingThread::Exiting\n");
|
||||
}
|
||||
|
||||
void GpuAgent::PcSamplingThreadRun(void* _agent) {
|
||||
GpuAgent* agent = (GpuAgent*)_agent;
|
||||
agent->PcSamplingThread();
|
||||
debug_print("PcSamplingThread exiting...");
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session) {
|
||||
// TODO: implement me
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
} // namespace rocr
|
||||
|
||||
@@ -256,6 +256,9 @@ global:
|
||||
hsa_ven_amd_pcs_create;
|
||||
hsa_ven_amd_pcs_create_from_id;
|
||||
hsa_ven_amd_pcs_destroy;
|
||||
hsa_ven_amd_pcs_start;
|
||||
hsa_ven_amd_pcs_stop;
|
||||
hsa_ven_amd_pcs_flush;
|
||||
|
||||
local:
|
||||
*;
|
||||
|
||||
@@ -134,6 +134,24 @@ hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t handle) {
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t handle) {
|
||||
TRY;
|
||||
return PcsRuntime::instance()->PcSamplingStart(handle);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t handle) {
|
||||
TRY;
|
||||
return PcsRuntime::instance()->PcSamplingStop(handle);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t handle) {
|
||||
TRY;
|
||||
return PcsRuntime::instance()->PcSamplingFlush(handle);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
void LoadPcSampling(core::PcSamplingExtTableInternal* pcs_api) {
|
||||
pcs_api->hsa_ven_amd_pcs_iterate_configuration_fn = hsa_ven_amd_pcs_iterate_configuration;
|
||||
pcs_api->hsa_ven_amd_pcs_create_fn = hsa_ven_amd_pcs_create;
|
||||
|
||||
@@ -73,6 +73,12 @@ hsa_status_t hsa_ven_amd_pcs_create_from_id(
|
||||
|
||||
hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
// Update Api table with func pointers that implement functionality
|
||||
void LoadPcSampling(core::PcSamplingExtTableInternal* pcs_api);
|
||||
|
||||
|
||||
@@ -104,7 +104,7 @@ PcsRuntime::PcSamplingSession::PcSamplingSession(
|
||||
core::Agent* _agent, hsa_ven_amd_pcs_method_kind_t method, hsa_ven_amd_pcs_units_t units,
|
||||
size_t interval, size_t latency, size_t buffer_size,
|
||||
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data)
|
||||
: agent(_agent), thunkId_(0), valid_(true), sample_size_(0) {
|
||||
: agent(_agent), thunkId_(0), active_(false), valid_(true), sample_size_(0) {
|
||||
switch (method) {
|
||||
case HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1:
|
||||
sample_size_ = sizeof(perf_sample_hosttrap_v1_t);
|
||||
@@ -240,6 +240,41 @@ hsa_status_t PcsRuntime::PcSamplingDestroy(hsa_ven_amd_pcs_t handle) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_status_t PcsRuntime::PcSamplingStart(hsa_ven_amd_pcs_t handle) {
|
||||
ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
|
||||
auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
|
||||
if (pcSamplingSessionIt == pc_sampling_.end()) {
|
||||
debug_warning(false && "Cannot find PcSampling session");
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);
|
||||
|
||||
return gpu_agent->PcSamplingStart(pcSamplingSessionIt->second);
|
||||
}
|
||||
|
||||
hsa_status_t PcsRuntime::PcSamplingStop(hsa_ven_amd_pcs_t handle) {
|
||||
ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
|
||||
auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
|
||||
if (pcSamplingSessionIt == pc_sampling_.end()) {
|
||||
debug_warning(false && "Cannot find PcSampling session");
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);
|
||||
|
||||
return gpu_agent->PcSamplingStop(pcSamplingSessionIt->second);
|
||||
}
|
||||
|
||||
hsa_status_t PcsRuntime::PcSamplingFlush(hsa_ven_amd_pcs_t handle) {
|
||||
ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
|
||||
auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
|
||||
if (pcSamplingSessionIt == pc_sampling_.end()) {
|
||||
debug_warning(false && "Cannot find PcSampling session");
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);
|
||||
|
||||
return gpu_agent->PcSamplingFlush(pcSamplingSessionIt->second);
|
||||
}
|
||||
|
||||
} // namespace pcs
|
||||
} // namespace rocr
|
||||
|
||||
@@ -70,7 +70,7 @@ class PcsRuntime {
|
||||
|
||||
class PcSamplingSession {
|
||||
public:
|
||||
PcSamplingSession() : agent(NULL), thunkId_(0){};
|
||||
PcSamplingSession() : agent(NULL), thunkId_(0), active_(false){};
|
||||
PcSamplingSession(core::Agent* agent, hsa_ven_amd_pcs_method_kind_t method,
|
||||
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
|
||||
size_t buffer_size, hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
|
||||
@@ -88,11 +88,15 @@ class PcsRuntime {
|
||||
core::Agent* agent;
|
||||
void SetThunkId(HsaPcSamplingTraceId thunkId) { thunkId_ = thunkId; }
|
||||
HsaPcSamplingTraceId ThunkId() { return thunkId_; }
|
||||
bool isActive() { return active_; }
|
||||
void start() { active_ = true; }
|
||||
void stop() { active_ = false; }
|
||||
|
||||
private:
|
||||
HsaPcSamplingTraceId thunkId_;
|
||||
|
||||
bool valid_; // Whether configuration parameters are valid
|
||||
bool active_; // Set to true when the session is started
|
||||
bool valid_; // Whether configuration parameters are valid
|
||||
size_t sample_size_;
|
||||
|
||||
struct client_session_data_t {
|
||||
@@ -126,6 +130,9 @@ class PcsRuntime {
|
||||
void* client_cb_data, hsa_ven_amd_pcs_t* handle);
|
||||
|
||||
hsa_status_t PcSamplingDestroy(hsa_ven_amd_pcs_t handle);
|
||||
hsa_status_t PcSamplingStart(hsa_ven_amd_pcs_t handle);
|
||||
hsa_status_t PcSamplingStop(hsa_ven_amd_pcs_t handle);
|
||||
hsa_status_t PcSamplingFlush(hsa_ven_amd_pcs_t handle);
|
||||
|
||||
private:
|
||||
/// @brief Initialize singleton object, must be called once.
|
||||
|
||||
Ссылка в новой задаче
Block a user