Fichiers
Evgeny 7c4369bde4 Initial Commit
Contributors:
Ammar ELWazir <aelwazir@amd.com>
AravindanC <aravindan.cheruvally@amd.com>
Benjamin Welton <bewelton@amd.com>
Ma, Bing <Bing.Ma@amd.com>
Chun Yang <chun.yang@amd.com>
Cole Nelson <cole.nelson@amd.com>
Ethan Stewart <ethan.stewart@amd.com>
Evgeny <evgeny.shcherbakov@amd.com>
Freddy Paul <Freddy.paul@amd.com>
Giovanni Baraldi <gbaraldi@amd.com>
Gopesh Bhardwaj <Gopesh.Bhardwaj@amd.com>
Icarus Sparry <icarus.sparry@amd.com>
itrowbri <Ian.Trowbridge@amd.com>
James Edwards <JamesAdrian.Edwards@amd.com>
jatang <jatang@amd.com>
Jeremy Newton <Jeremy.Newton@amd.com>
Jonathan Kim <jonathan.kim@amd.com>
Kent Russell <kent.russell@amd.com>
Kiumars Sabeti <kiumars.sabeti@amd.com>
Lang Yu <lang.yu@amd.com>
Laurent Morichetti <laurent.morichetti@amd.com>
Mallya, Ameya Keshava <AmeyaKeshava.Mallya@amd.com>
Manjunath Jakaraddi <manjunath.jakaraddi@amd.com>
Mark Laws <markdavid.laws@amd.com>
Mohan Kumar Mithur <Mohan.KumarMithur@amd.com>
Nicholas Curtis <nicurtis@amd.com>
Nirmal Unnikrishnan <Nirmal.Unnikrishnan@amd.com>
Parag Bhandari <parag.bhandari@amd.com>
Ranjith Ramakrishnan <Ranjith.Ramakrishnan@amd.com>
Robert Gregory <Robert.Gregory@amd.com>
Saravanan Solaiyappan <saravanan.solaiyappan@amd.com>
Saurabh Verma <saurabh.verma@amd.com>
Srihari Uttanur <srihari.u@amd.com>
Srinivasan Subramanian <srinivasan.subramanian@amd.com>
Sriraksha Nagaraj <Sriraksha.Nagaraj@amd.com>
Sushma Vaddireddy <svaddire@amd.com>
Xianwei Zhang <Xianwei.Zhang@amd.com>


[ROCm/aqlprofile commit: 1ed169e30c]
2025-05-28 10:10:47 -05:00

178 lignes
4.7 KiB
C++

// MIT License
//
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "util/perf_timer.h"
PerfTimer::PerfTimer() { freq_in_100mhz_ = MeasureTSCFreqHz(); }
PerfTimer::~PerfTimer() {
while (!timers_.empty()) {
Timer* temp = timers_.back();
timers_.pop_back();
delete temp;
}
}
// New cretaed timer instantance index will be returned
int PerfTimer::CreateTimer() {
Timer* newTimer = new Timer;
newTimer->start = 0;
newTimer->clocks = 0;
#ifdef _WIN32
QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->freq);
#else
newTimer->freq = (long long)1.0E3;
#endif
/* Push back the address of new Timer instance created */
timers_.push_back(newTimer);
return (int)(timers_.size() - 1);
}
int PerfTimer::StartTimer(int index) {
if (index >= (int)timers_.size()) {
Error("Cannot reset timer. Invalid handle.");
return FAILURE;
}
#ifdef _WIN32
// General Windows timing method
#ifndef _AMD
long long tmpStart;
QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart));
timers_[index]->start = (double)tmpStart;
#else
// AMD Windows timing method
#endif
#else
// General Linux timing method
#ifndef _AMD
struct timeval s;
gettimeofday(&s, 0);
timers_[index]->start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3));
#else
// AMD timing method
unsigned int unused;
timers_[index]->start = __rdtscp(&unused);
#endif
#endif
return SUCCESS;
}
int PerfTimer::StopTimer(int index) {
double n = 0;
if (index >= (int)timers_.size()) {
Error("Cannot reset timer. Invalid handle.");
return FAILURE;
}
#ifdef _WIN32
#ifndef _AMD
long long n1;
QueryPerformanceCounter((LARGE_INTEGER*)&(n1));
n = (double)n1;
#else
// AMD Window Timing
#endif
#else
// General Linux timing method
#ifndef _AMD
struct timeval s;
gettimeofday(&s, 0);
n = s.tv_sec * 1.0E3 + (double)(s.tv_usec / 1.0E3);
#else
// AMD Linux timing
unsigned int unused;
n = __rdtscp(&unused);
#endif
#endif
n -= timers_[index]->start;
timers_[index]->start = 0;
#ifndef _AMD
timers_[index]->clocks += n;
#else
// timers_[index]->clocks += 10 * n / freq_in_100mhz_; // unit is ns
timers_[index]->clocks += 1.0E-6 * 10 * n / freq_in_100mhz_; // convert to ms
#endif
return SUCCESS;
}
void PerfTimer::Error(std::string str) { std::cout << str << std::endl; }
double PerfTimer::ReadTimer(int index) {
if (index >= (int)timers_.size()) {
Error("Cannot read timer. Invalid handle.");
return FAILURE;
}
double reading = double(timers_[index]->clocks);
reading = double(reading / timers_[index]->freq);
return reading;
}
uint64_t PerfTimer::CoarseTimestampUs() {
#ifdef _WIN32
uint64_t freqHz, ticks;
QueryPerformanceFrequency((LARGE_INTEGER*)&freqHz);
QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
// Scale numerator and divisor until (ticks * 1000000) fits in uint64_t.
while (ticks > (1ULL << 44)) {
ticks /= 16;
freqHz /= 16;
}
return (ticks * 1000000) / freqHz;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
#endif
}
uint64_t PerfTimer::MeasureTSCFreqHz() {
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
unsigned int unused;
uint64_t tscTicksEnd;
uint64_t coarseBeginUs = CoarseTimestampUs();
uint64_t tscTicksBegin = __rdtscp(&unused);
do {
tscTicksEnd = __rdtscp(&unused);
} while (tscTicksEnd - tscTicksBegin < 1000000000);
uint64_t coarseEndUs = CoarseTimestampUs();
// Compute the TSC frequency and round to nearest 100MHz.
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}