8dd0ed0240
SWDEV-147340 - Added function UpdateAppPowerProfile() that notices KMD to update applicaiton profile. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#82 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#45 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os.hpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#43 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#49 edit
452 wiersze
13 KiB
C++
452 wiersze
13 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#ifndef OS_HPP_
|
|
#define OS_HPP_
|
|
|
|
#include "top.hpp"
|
|
#include "utils/util.hpp"
|
|
|
|
#include <vector>
|
|
#include <string>
|
|
|
|
#if defined(__linux__)
|
|
#include <sched.h>
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
#include <Basetsd.h> // For KAFFINITY
|
|
#endif // _WIN32
|
|
|
|
// Smallest supported VM page size.
|
|
#define MIN_PAGE_SHIFT 12
|
|
#define MIN_PAGE_SIZE (1UL << MIN_PAGE_SHIFT)
|
|
|
|
namespace amd {
|
|
|
|
/*! \addtogroup Os Operating System Abstraction
|
|
*
|
|
* \copydoc amd::Os
|
|
*
|
|
* @{
|
|
*/
|
|
|
|
class Thread; // For Os::createOsThread()
|
|
|
|
class Os : AllStatic {
|
|
public:
|
|
enum MemProt { MEM_PROT_NONE = 0, MEM_PROT_READ, MEM_PROT_RW, MEM_PROT_RWX };
|
|
|
|
class ThreadAffinityMask {
|
|
friend class Os;
|
|
|
|
private:
|
|
#if defined(__linux__)
|
|
cpu_set_t mask_;
|
|
#else // _WIN32
|
|
#if !defined(_WIN32)
|
|
typedef uint KAFFINITY;
|
|
#endif
|
|
KAFFINITY mask_[512 / sizeof(KAFFINITY)];
|
|
#endif
|
|
|
|
public:
|
|
ThreadAffinityMask() { init(); }
|
|
|
|
inline void init();
|
|
inline void set(uint cpu);
|
|
inline void clear(uint cpu);
|
|
inline bool isSet(uint cpu) const;
|
|
inline bool isEmpty() const;
|
|
inline uint countSet() const;
|
|
|
|
inline uint getFirstSet() const;
|
|
inline uint getNextSet(uint cpu) const;
|
|
|
|
#if defined(__linux__)
|
|
inline void set(const cpu_set_t& mask);
|
|
inline void clear(const cpu_set_t& mask);
|
|
inline void adjust(cpu_set_t& mask) const;
|
|
inline cpu_set_t& getNative() { return mask_; }
|
|
#else
|
|
inline void set(size_t group, KAFFINITY affinity);
|
|
inline void adjust(size_t group, KAFFINITY& affinity) const;
|
|
#endif
|
|
};
|
|
|
|
private:
|
|
static const size_t FILE_PATH_MAX_LENGTH = 1024;
|
|
|
|
static size_t pageSize_; //!< The default os page size.
|
|
static int processorCount_; //!< The number of active processors.
|
|
|
|
private:
|
|
//! Load the shared library named by \a filename
|
|
static void* loadLibrary_(const char* filename);
|
|
|
|
public:
|
|
//! Initialize the Os package.
|
|
static bool init();
|
|
//! Tear down the Os package.
|
|
static void tearDown();
|
|
|
|
// Topology helper routines:
|
|
//
|
|
|
|
//! Return the number of active processors in the system.
|
|
inline static int processorCount();
|
|
|
|
#if defined(ATI_ARCH_X86)
|
|
//! Query the processor information about supported features and CPU type.
|
|
static void cpuid(int regs[4], int info);
|
|
//! Get value of extended control register
|
|
static uint64_t xgetbv(uint32_t which);
|
|
#endif // ATI_ARCH_X86
|
|
|
|
// Stack helper routines:
|
|
//
|
|
|
|
//! Return the current stack base and size information.
|
|
static void currentStackInfo(address* base, size_t* size);
|
|
|
|
//! Return the value of the current stack pointer.
|
|
static NOT_WIN64(inline) address currentStackPtr();
|
|
//! Set the value of the current stack pointer.
|
|
static WIN64_ONLY(inline) void WINDOWS_ONLY(__stdcall /*callee cleanup*/)
|
|
setCurrentStackPtr(address sp);
|
|
//! Touches all stack pages between [bottom,top[
|
|
static void touchStackPages(address bottom, address top);
|
|
|
|
// Thread routines:
|
|
//
|
|
|
|
//! Create a native thread and link it to the given OsThread.
|
|
static const void* createOsThread(Thread* osThread);
|
|
//! Set the thread's affinity to the given cpu ordinal.
|
|
static void setThreadAffinity(const void* handle, unsigned int cpu);
|
|
//! Set the thread's affinity to the given cpu mask.
|
|
static void setThreadAffinity(const void* handle, const ThreadAffinityMask& mask);
|
|
//! Set the currently running thread's name.
|
|
static void setCurrentThreadName(const char* name);
|
|
//! Check if the thread is alive
|
|
static bool isThreadAlive(const Thread& osThread);
|
|
|
|
//! Sleep for n milli-seconds.
|
|
static void sleep(long n);
|
|
//! Yield to threads of the same or lower priority
|
|
static void yield();
|
|
//! Execute a pause instruction (for spin loops).
|
|
static void spinPause();
|
|
|
|
// Memory routines:
|
|
//
|
|
|
|
//! Return the default os page size.
|
|
inline static size_t pageSize();
|
|
//! Return the amount of host total physical memory in bytes.
|
|
static uint64_t hostTotalPhysicalMemory();
|
|
|
|
//! Reserve a chunk of memory (priv | anon | noreserve).
|
|
static address reserveMemory(address start, size_t size, size_t alignment = 0,
|
|
MemProt prot = MEM_PROT_NONE);
|
|
//! Release a chunk of memory reserved with reserveMemory.
|
|
static bool releaseMemory(void* addr, size_t size);
|
|
//! Commit a chunk of memory previously reserved with reserveMemory.
|
|
static bool commitMemory(void* addr, size_t size, MemProt prot = MEM_PROT_NONE);
|
|
//! Uncommit a chunk of memory previously committed with commitMemory.
|
|
static bool uncommitMemory(void* addr, size_t size);
|
|
//! Set the page protections for the given memory region.
|
|
static bool protectMemory(void* addr, size_t size, MemProt prot);
|
|
|
|
//! Allocate an aligned chunk of memory.
|
|
static void* alignedMalloc(size_t size, size_t alignment);
|
|
//! Deallocate an aligned chunk of memory.
|
|
static void alignedFree(void* mem);
|
|
|
|
//! Platform-specific optimized memcpy()
|
|
static void* fastMemcpy(void* dest, const void* src, size_t n);
|
|
|
|
// File/Path helper routines:
|
|
//
|
|
|
|
//! Return the shared library extension string.
|
|
static const char* libraryExtension();
|
|
//! Return the shared library prefix string.
|
|
static const char* libraryPrefix();
|
|
//! Return the object extension string.
|
|
static const char* objectExtension();
|
|
//! Return the file separator char.
|
|
static char fileSeparator();
|
|
//! Return the path separator char.
|
|
static char pathSeparator();
|
|
//! Return whether the path exists
|
|
static bool pathExists(const std::string& path);
|
|
//! Create the path if it does not exist
|
|
static bool createPath(const std::string& path);
|
|
//! Remove the path if it is empty
|
|
static bool removePath(const std::string& path);
|
|
//! Printf re-implementation (due to MS CRT problem)
|
|
static int printf(const char* fmt, ...);
|
|
/*! \brief Invokes the command processor for the command execution
|
|
*
|
|
* \result Returns the operation result
|
|
*/
|
|
static int systemCall(const std::string& command); //!< command for execution
|
|
|
|
/*! \brief Retrieves a string containing the value
|
|
* of the environment variable
|
|
*
|
|
* \result Returns the environment variable value
|
|
*/
|
|
static std::string getEnvironment(const std::string& name); //!< the environment variable's name
|
|
|
|
/*! \brief Retrieves the path of the directory designated for temporary
|
|
* files
|
|
*
|
|
* \result Returns the temporary path
|
|
*/
|
|
static std::string getTempPath();
|
|
|
|
/*! \brief Creates a name for a temporary file
|
|
*
|
|
* \result Returns the name of temporary file
|
|
*/
|
|
static std::string getTempFileName();
|
|
|
|
//! Deletes file
|
|
static int unlink(const std::string& path);
|
|
|
|
// Library routines:
|
|
//
|
|
typedef bool (*SymbolCallback)(std::string, const void*, void*);
|
|
|
|
//! Load the shared library named by \a filename
|
|
static void* loadLibrary(const char* filename);
|
|
//! Unload the shared library.
|
|
static void unloadLibrary(void* handle);
|
|
//! Return the address of the function identified by \a name.
|
|
static void* getSymbol(void* handle, const char* name);
|
|
//! Get all the __kernel functions in the given shared library.
|
|
static bool iterateSymbols(void* handle, SymbolCallback func, void* data);
|
|
|
|
// Time routines:
|
|
//
|
|
|
|
//! Return the current system time counter in nanoseconds.
|
|
static uint64_t timeNanos();
|
|
//! Return the system timer's resolution in nanoseconds.
|
|
static uint64_t timerResolutionNanos();
|
|
//! Return the timeNanos starting point offset to Epoch.
|
|
static uint64_t offsetToEpochNanos();
|
|
|
|
// X86 Instructions helpers:
|
|
//
|
|
|
|
//! Skip an IDIV (F6/F7) instruction and return a pointer to the next insn.
|
|
static bool skipIDIV(address& insn);
|
|
|
|
// return gloabal memory size to be assigned to device info
|
|
static size_t getPhysicalMemSize();
|
|
|
|
//! get Application file name and path
|
|
static void getAppPathAndFileName(std::string& appName, std::string& appPathAndName);
|
|
|
|
//! Install SIGFPE handler for CPU device
|
|
static bool installSigfpeHandler();
|
|
|
|
//! Uninstall SIGFPE handler for CPU device
|
|
static void uninstallSigfpeHandler();
|
|
};
|
|
|
|
/*@}*/
|
|
|
|
inline size_t Os::pageSize() {
|
|
assert(pageSize_ != 0 && "runtime is not initialized");
|
|
return pageSize_;
|
|
}
|
|
|
|
inline int Os::processorCount() { return processorCount_; }
|
|
|
|
#if defined(_WIN64)
|
|
|
|
extern "C" void _Os_setCurrentStackPtr(address sp);
|
|
|
|
ALWAYSINLINE void Os::setCurrentStackPtr(address sp) { _Os_setCurrentStackPtr(sp); }
|
|
|
|
#else // !_WIN64
|
|
|
|
ALWAYSINLINE address Os::currentStackPtr() {
|
|
intptr_t value;
|
|
|
|
#if defined(__GNUC__)
|
|
__asm__ __volatile__(
|
|
#if defined(ATI_ARCH_X86)
|
|
LP64_SWITCH("movl %%esp", "movq %%rsp") ",%0"
|
|
: "=r"(value)
|
|
#elif defined(ATI_ARCH_ARM)
|
|
"mov %0,sp"
|
|
: "=r"(value)
|
|
#endif
|
|
);
|
|
#else // !__GNUC__
|
|
__asm mov value, esp;
|
|
#endif // !__GNUC__
|
|
|
|
return (address)value;
|
|
}
|
|
|
|
#endif // !_WIN64
|
|
|
|
|
|
#if defined(__linux__)
|
|
|
|
inline void Os::ThreadAffinityMask::init() { CPU_ZERO(&mask_); }
|
|
|
|
inline void Os::ThreadAffinityMask::set(uint cpu) { CPU_SET(cpu, &mask_); }
|
|
|
|
inline void Os::ThreadAffinityMask::clear(uint cpu) { CPU_CLR(cpu, &mask_); }
|
|
|
|
inline bool Os::ThreadAffinityMask::isSet(uint cpu) const { return CPU_ISSET(cpu, &mask_); }
|
|
|
|
inline bool Os::ThreadAffinityMask::isEmpty() const {
|
|
const uint32_t* bits = (const uint32_t*)mask_.__bits;
|
|
for (uint i = 0; i < sizeof(mask_.__bits) / sizeof(uint32_t); ++i) {
|
|
if (bits[i] != 0) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline void Os::ThreadAffinityMask::set(const cpu_set_t& mask) { mask_ = mask; }
|
|
|
|
inline void Os::ThreadAffinityMask::clear(const cpu_set_t& mask) {
|
|
const uint32_t* bitsClear = (const uint32_t*)mask.__bits;
|
|
uint32_t* bits = (uint32_t*)mask_.__bits;
|
|
for (uint i = 0; i < sizeof(mask_.__bits) / sizeof(uint32_t); ++i) {
|
|
bits[i] &= ~bitsClear[i];
|
|
}
|
|
}
|
|
|
|
inline void Os::ThreadAffinityMask::adjust(cpu_set_t& mask) const {
|
|
uint32_t* bitsOut = (uint32_t*)mask.__bits;
|
|
const uint32_t* bits = (const uint32_t*)mask_.__bits;
|
|
for (uint i = 0; i < sizeof(mask_.__bits) / sizeof(uint32_t); ++i) {
|
|
bitsOut[i] &= bits[i];
|
|
}
|
|
}
|
|
|
|
inline uint Os::ThreadAffinityMask::countSet() const {
|
|
uint count = 0;
|
|
const uint32_t* bits = (const uint32_t*)mask_.__bits;
|
|
for (uint i = 0; i < sizeof(mask_.__bits) / sizeof(uint32_t); ++i) {
|
|
count += countBitsSet(bits[i]);
|
|
}
|
|
return count;
|
|
}
|
|
|
|
inline uint Os::ThreadAffinityMask::getFirstSet() const {
|
|
const uint32_t* bits = (const uint32_t*)mask_.__bits;
|
|
for (uint i = 0; i < sizeof(mask_.__bits) / sizeof(uint32_t); ++i) {
|
|
if (bits[i] != 0) {
|
|
return leastBitSet(bits[i]) + (i * (8 * sizeof(uint32_t)));
|
|
}
|
|
}
|
|
return (uint)-1;
|
|
}
|
|
|
|
inline uint Os::ThreadAffinityMask::getNextSet(uint cpu) const {
|
|
const uint32_t* bits = (const uint32_t*)mask_.__bits;
|
|
++cpu;
|
|
uint j = cpu % (8 * sizeof(uint32_t));
|
|
for (uint i = cpu / (8 * sizeof(uint32_t)); i < sizeof(mask_.__bits) / sizeof(uint32_t); ++i) {
|
|
if (bits[i] != 0) {
|
|
for (; j < (8 * sizeof(uint32_t)); ++j) {
|
|
if (0 != (bits[i] & ((uint32_t)1 << j))) {
|
|
return i * (8 * sizeof(uint32_t)) + j;
|
|
}
|
|
}
|
|
}
|
|
j = 0;
|
|
}
|
|
return (uint)-1;
|
|
}
|
|
|
|
#else
|
|
|
|
inline void Os::ThreadAffinityMask::init() {
|
|
for (uint i = 0; i < sizeof(mask_) / sizeof(KAFFINITY); ++i) {
|
|
mask_[i] = (KAFFINITY)0;
|
|
}
|
|
}
|
|
|
|
inline void Os::ThreadAffinityMask::set(uint cpu) {
|
|
mask_[cpu / (8 * sizeof(KAFFINITY))] |= (KAFFINITY)1 << (cpu % (8 * sizeof(KAFFINITY)));
|
|
}
|
|
|
|
inline void Os::ThreadAffinityMask::clear(uint cpu) {
|
|
mask_[cpu / (8 * sizeof(KAFFINITY))] &= ~((KAFFINITY)1 << (cpu % (8 * sizeof(KAFFINITY))));
|
|
}
|
|
|
|
inline bool Os::ThreadAffinityMask::isSet(uint cpu) const {
|
|
return (KAFFINITY)0 !=
|
|
(mask_[cpu / (8 * sizeof(KAFFINITY))] & ((KAFFINITY)1 << (cpu % (8 * sizeof(KAFFINITY)))));
|
|
}
|
|
|
|
inline bool Os::ThreadAffinityMask::isEmpty() const {
|
|
for (uint i = 0; i < sizeof(mask_) / sizeof(KAFFINITY); ++i) {
|
|
if (mask_[i] != (KAFFINITY)0) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline void Os::ThreadAffinityMask::set(size_t group, KAFFINITY affinity) {
|
|
mask_[group] |= affinity;
|
|
}
|
|
|
|
inline void Os::ThreadAffinityMask::adjust(size_t group, KAFFINITY& affinity) const {
|
|
affinity &= mask_[group];
|
|
}
|
|
|
|
inline uint Os::ThreadAffinityMask::countSet() const {
|
|
uint count = 0;
|
|
for (uint i = 0; i < sizeof(mask_) / sizeof(KAFFINITY); ++i) {
|
|
count += countBitsSet(mask_[i]);
|
|
}
|
|
return count;
|
|
}
|
|
|
|
inline uint Os::ThreadAffinityMask::getFirstSet() const {
|
|
for (uint i = 0; i < sizeof(mask_) / sizeof(KAFFINITY); ++i) {
|
|
if (mask_[i] != 0) {
|
|
return leastBitSet(mask_[i]) + (i * (8 * sizeof(KAFFINITY)));
|
|
}
|
|
}
|
|
return (uint)-1;
|
|
}
|
|
|
|
inline uint Os::ThreadAffinityMask::getNextSet(uint cpu) const {
|
|
++cpu;
|
|
uint j = cpu % (8 * sizeof(KAFFINITY));
|
|
for (uint i = cpu / (8 * sizeof(KAFFINITY)); i < sizeof(mask_) / sizeof(KAFFINITY); ++i) {
|
|
if (mask_[i] != 0) {
|
|
for (; j < (8 * sizeof(KAFFINITY)); ++j) {
|
|
if (0 != (mask_[i] & ((KAFFINITY)1 << j))) {
|
|
return i * (8 * sizeof(KAFFINITY)) + j;
|
|
}
|
|
}
|
|
}
|
|
j = 0;
|
|
}
|
|
return (uint)-1;
|
|
}
|
|
|
|
#endif
|
|
|
|
} // namespace amd
|
|
|
|
#endif /*OS_HPP_*/
|