Merge branch 'develop' into amd/dev/gandryey/SWDEV-558849
This commit is contained in:
@@ -41,7 +41,7 @@ jobs:
|
|||||||
working-directory: projects/rocprofiler-compute
|
working-directory: projects/rocprofiler-compute
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
python -m pip install ruff
|
python -m pip install ruff==0.14.11
|
||||||
if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi
|
if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi
|
||||||
- name: Run Ruff Linter and Import Sorter
|
- name: Run Ruff Linter and Import Sorter
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -226,6 +226,12 @@ class AMDSMICommands():
|
|||||||
# Set args.* to passed in arguments
|
# Set args.* to passed in arguments
|
||||||
if gpu:
|
if gpu:
|
||||||
args.gpu = gpu
|
args.gpu = gpu
|
||||||
|
|
||||||
|
cpu_attributes = ["cpu"]
|
||||||
|
for attr in cpu_attributes:
|
||||||
|
if hasattr(args, 'cpu') and getattr(args, 'cpu'):
|
||||||
|
print("N/A")
|
||||||
|
return
|
||||||
|
|
||||||
# Handle No GPU passed
|
# Handle No GPU passed
|
||||||
if args.gpu == None:
|
if args.gpu == None:
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs
|
|||||||
|
|
||||||
* New HIP APIs
|
* New HIP APIs
|
||||||
- `hipKernelGetParamInfo` returns the offset and size of a kernel parameter
|
- `hipKernelGetParamInfo` returns the offset and size of a kernel parameter
|
||||||
|
* Support for `barrier_arrive` and `barrier_wait` for `grid_group` and `thread_block`.
|
||||||
|
|
||||||
* New HIP supports
|
* New HIP supports
|
||||||
- `grid_group::block_rank()` returns the rank of the block in the calling thread
|
- `grid_group::block_rank()` returns the rank of the block in the calling thread
|
||||||
|
|||||||
@@ -111,9 +111,8 @@
|
|||||||
#if !defined(__HIPCC_RTC__)
|
#if !defined(__HIPCC_RTC__)
|
||||||
#include <hip/amd_detail/amd_hip_common.h>
|
#include <hip/amd_detail/amd_hip_common.h>
|
||||||
#include "amd_hip_vector_types.h" // float2 etc
|
#include "amd_hip_vector_types.h" // float2 etc
|
||||||
#include "device_library_decls.h" // ocml conversion functions
|
|
||||||
#include "math_fwd.h" // ocml device functions
|
|
||||||
#if defined(__clang__) && defined(__HIP__)
|
#if defined(__clang__) && defined(__HIP__)
|
||||||
|
#include "math_fwd.h" // ocml device functions
|
||||||
#include <hip/amd_detail/amd_warp_functions.h> // define warpSize
|
#include <hip/amd_detail/amd_warp_functions.h> // define warpSize
|
||||||
#include <hip/amd_detail/amd_warp_sync_functions.h> // Sync functions
|
#include <hip/amd_detail/amd_warp_sync_functions.h> // Sync functions
|
||||||
#endif
|
#endif
|
||||||
@@ -338,7 +337,11 @@ struct __attribute__((aligned(2))) __hip_bfloat16 {
|
|||||||
};
|
};
|
||||||
/**@}*/
|
/**@}*/
|
||||||
|
|
||||||
|
#if defined(__clang__)
|
||||||
typedef __bf16 __bf16_2 __attribute__((ext_vector_type(2)));
|
typedef __bf16 __bf16_2 __attribute__((ext_vector_type(2)));
|
||||||
|
#else
|
||||||
|
typedef __bf16 __bf16_2 __attribute__((vector_size(sizeof(__bf16) * 2)));
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \defgroup HIP_INTRINSIC_BFLOAT162_STRUCT
|
* \defgroup HIP_INTRINSIC_BFLOAT162_STRUCT
|
||||||
@@ -350,6 +353,7 @@ struct __attribute__((aligned(4))) __hip_bfloat162 {
|
|||||||
static_assert(sizeof(__hip_bfloat16[2]) == sizeof(__bf16_2));
|
static_assert(sizeof(__hip_bfloat16[2]) == sizeof(__bf16_2));
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
#if defined(__clang__)
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
__hip_bfloat16 x; /*! \brief raw representation of bfloat16 */
|
__hip_bfloat16 x; /*! \brief raw representation of bfloat16 */
|
||||||
@@ -357,7 +361,12 @@ struct __attribute__((aligned(4))) __hip_bfloat162 {
|
|||||||
};
|
};
|
||||||
__bf16_2 __xy_bf162;
|
__bf16_2 __xy_bf162;
|
||||||
};
|
};
|
||||||
|
#else
|
||||||
|
/* GCC does not support anonymous structs with members that have non-trivial constructors (Clang
|
||||||
|
allows this as an extension). Expose x and y directly instead. */
|
||||||
|
__hip_bfloat16 x;
|
||||||
|
__hip_bfloat16 y;
|
||||||
|
#endif
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/*! \brief create __hip_bfloat162 from __hip_bfloat162_raw */
|
/*! \brief create __hip_bfloat162 from __hip_bfloat162_raw */
|
||||||
@@ -373,7 +382,11 @@ struct __attribute__((aligned(4))) __hip_bfloat162 {
|
|||||||
: x(a), y(b) {}
|
: x(a), y(b) {}
|
||||||
|
|
||||||
/*! \brief create __hip_bfloat162 from vector of __bf16_2 */
|
/*! \brief create __hip_bfloat162 from vector of __bf16_2 */
|
||||||
|
#if defined(__clang__)
|
||||||
__BF16_HOST_DEVICE__ __hip_bfloat162(const __bf16_2 in) : __xy_bf162(in) {}
|
__BF16_HOST_DEVICE__ __hip_bfloat162(const __bf16_2 in) : __xy_bf162(in) {}
|
||||||
|
#else
|
||||||
|
__BF16_HOST_DEVICE__ __hip_bfloat162(const __bf16_2 in) : x{in[0]}, y{in[1]} {}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*! \brief default constructor of __hip_bfloat162 */
|
/*! \brief default constructor of __hip_bfloat162 */
|
||||||
__BF16_HOST_DEVICE__ __hip_bfloat162() = default;
|
__BF16_HOST_DEVICE__ __hip_bfloat162() = default;
|
||||||
@@ -392,11 +405,22 @@ struct __attribute__((aligned(4))) __hip_bfloat162 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*! \brief return a vector of bf16 */
|
/*! \brief return a vector of bf16 */
|
||||||
__BF16_HOST_DEVICE__ operator __bf16_2() const { return __xy_bf162; }
|
__BF16_HOST_DEVICE__ operator __bf16_2() const {
|
||||||
|
#if defined(__clang__)
|
||||||
|
return __xy_bf162;
|
||||||
|
#else
|
||||||
|
return __bf16_2{x, y};
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/*! \brief return a vector of bf16 */
|
/*! \brief return a vector of bf16 */
|
||||||
__BF16_HOST_DEVICE__ __hip_bfloat162& operator=(const __bf16_2 in) {
|
__BF16_HOST_DEVICE__ __hip_bfloat162& operator=(const __bf16_2 in) {
|
||||||
|
#if defined(__clang__)
|
||||||
__xy_bf162 = in;
|
__xy_bf162 = in;
|
||||||
|
#else
|
||||||
|
x = __hip_bfloat16{in[0]};
|
||||||
|
y = __hip_bfloat16{in[1]};
|
||||||
|
#endif
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -835,6 +859,7 @@ __BF16_HOST_DEVICE_STATIC__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const
|
|||||||
return (__bf16)a / (__bf16)b;
|
return (__bf16)a / (__bf16)b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__clang__) && defined(__HIP__)
|
||||||
/**
|
/**
|
||||||
* \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
|
* \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
|
||||||
* \brief Performs FMA of given bfloat16 values
|
* \brief Performs FMA of given bfloat16 values
|
||||||
@@ -844,6 +869,7 @@ __BF16_DEVICE_STATIC__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip
|
|||||||
return __hip_bfloat16(__builtin_elementwise_fma(__bf16(a), __bf16(b), __bf16(c)));
|
return __hip_bfloat16(__builtin_elementwise_fma(__bf16(a), __bf16(b), __bf16(c)));
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
|
* \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
|
||||||
@@ -919,6 +945,8 @@ __BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 __hadd2_rn(const __hip_bfloat162 a,
|
|||||||
return __hip_bfloat162{__bf16_2(a) + __bf16_2(b)};
|
return __hip_bfloat162{__bf16_2(a) + __bf16_2(b)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__clang__) && defined(__HIP__)
|
||||||
/**
|
/**
|
||||||
* \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
|
* \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
|
||||||
* \brief Performs FMA of given bfloat162 values
|
* \brief Performs FMA of given bfloat162 values
|
||||||
@@ -927,6 +955,7 @@ __BF16_DEVICE_STATIC__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __
|
|||||||
const __hip_bfloat162 c) {
|
const __hip_bfloat162 c) {
|
||||||
return __hip_bfloat162{__builtin_elementwise_fma(__bf16_2(a), __bf16_2(b), __bf16_2(c))};
|
return __hip_bfloat162{__builtin_elementwise_fma(__bf16_2(a), __bf16_2(b), __bf16_2(c))};
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
|
* \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
|
||||||
@@ -1639,6 +1668,7 @@ __BF16_HOST_DEVICE_STATIC__ bool operator>=(const __hip_bfloat162& l, const __hi
|
|||||||
return fl.x >= fr.x && fl.x >= fr.y;
|
return fl.x >= fr.x && fl.x >= fr.y;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__clang__) && defined(__HIP__)
|
||||||
/**
|
/**
|
||||||
* \ingroup HIP_INTRINSIC_BFLOAT16_MATH
|
* \ingroup HIP_INTRINSIC_BFLOAT16_MATH
|
||||||
* \brief Calculate ceil of bfloat16
|
* \brief Calculate ceil of bfloat16
|
||||||
@@ -1883,7 +1913,6 @@ __BF16_DEVICE_STATIC__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h) {
|
|||||||
return __hip_bfloat162(htrunc(h.x), htrunc(h.y));
|
return __hip_bfloat162(htrunc(h.x), htrunc(h.y));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__clang__) && defined(__HIP__)
|
|
||||||
/**
|
/**
|
||||||
* \ingroup HIP_INTRINSIC_BFLOAT162_MATH
|
* \ingroup HIP_INTRINSIC_BFLOAT162_MATH
|
||||||
* \brief Atomic add bfloat162
|
* \brief Atomic add bfloat162
|
||||||
|
|||||||
@@ -212,6 +212,19 @@ class grid_group : public thread_group {
|
|||||||
//! @copydoc thread_group::sync
|
//! @copydoc thread_group::sync
|
||||||
__CG_QUALIFIER__ void sync() const { internal::grid::sync(); }
|
__CG_QUALIFIER__ void sync() const { internal::grid::sync(); }
|
||||||
__CG_QUALIFIER__ dim3 group_dim() const { return internal::grid::grid_dim(); }
|
__CG_QUALIFIER__ dim3 group_dim() const { return internal::grid::grid_dim(); }
|
||||||
|
struct arrival_token {
|
||||||
|
unsigned int signal;
|
||||||
|
};
|
||||||
|
//! Arrive at a barrier
|
||||||
|
__CG_QUALIFIER__ arrival_token barrier_arrive() const {
|
||||||
|
arrival_token t;
|
||||||
|
t.signal = internal::grid::barrier_signal();
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
//! Arrive at a barrier
|
||||||
|
__CG_QUALIFIER__ void barrier_wait(arrival_token&& t) const {
|
||||||
|
internal::grid::barrier_wait(t.signal);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \ingroup CooperativeGConstruct
|
/** \ingroup CooperativeGConstruct
|
||||||
@@ -295,6 +308,14 @@ class thread_block : public thread_group {
|
|||||||
__CG_STATIC_QUALIFIER__ void sync() { internal::workgroup::sync(); }
|
__CG_STATIC_QUALIFIER__ void sync() { internal::workgroup::sync(); }
|
||||||
//! Returns the group dimensions.
|
//! Returns the group dimensions.
|
||||||
__CG_QUALIFIER__ dim3 group_dim() { return internal::workgroup::block_dim(); }
|
__CG_QUALIFIER__ dim3 group_dim() { return internal::workgroup::block_dim(); }
|
||||||
|
struct arrival_token {};
|
||||||
|
//! Arrive at a barrier
|
||||||
|
__CG_QUALIFIER__ arrival_token barrier_arrive() const {
|
||||||
|
internal::workgroup::barrier_arrive();
|
||||||
|
return arrival_token{};
|
||||||
|
}
|
||||||
|
//! Arrive at a barrier
|
||||||
|
__CG_QUALIFIER__ void barrier_wait(arrival_token&&) const { internal::workgroup::barrier_wait(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \ingroup CooperativeGConstruct
|
/** \ingroup CooperativeGConstruct
|
||||||
|
|||||||
@@ -97,6 +97,8 @@ extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_size(void);
|
|||||||
extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank(void);
|
extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank(void);
|
||||||
extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void);
|
extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void);
|
||||||
extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void);
|
extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void);
|
||||||
|
extern "C" __device__ __attribute__((const)) uint __ockl_grid_bar_arrive(void);
|
||||||
|
extern "C" __device__ __attribute__((convergent)) void __ockl_grid_bar_wait(uint);
|
||||||
|
|
||||||
extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float);
|
extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float);
|
||||||
|
|
||||||
|
|||||||
@@ -63,7 +63,7 @@
|
|||||||
#define HIP_API_TABLE_STEP_VERSION 0
|
#define HIP_API_TABLE_STEP_VERSION 0
|
||||||
#define HIP_COMPILER_API_TABLE_STEP_VERSION 0
|
#define HIP_COMPILER_API_TABLE_STEP_VERSION 0
|
||||||
#define HIP_TOOLS_API_TABLE_STEP_VERSION 0
|
#define HIP_TOOLS_API_TABLE_STEP_VERSION 0
|
||||||
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 20
|
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 21
|
||||||
|
|
||||||
// HIP API interface
|
// HIP API interface
|
||||||
// HIP compiler dispatch functions
|
// HIP compiler dispatch functions
|
||||||
@@ -1113,6 +1113,9 @@ typedef hipError_t (*t_hipKernelGetLibrary)(hipLibrary_t* library, hipKernel_t k
|
|||||||
typedef hipError_t (*t_hipKernelGetName)(const char** name, hipKernel_t kernel);
|
typedef hipError_t (*t_hipKernelGetName)(const char** name, hipKernel_t kernel);
|
||||||
typedef hipError_t (*t_hipGetProcAddress_spt)(const char* symbol, void** pfn, int hipVersion, uint64_t flags,
|
typedef hipError_t (*t_hipGetProcAddress_spt)(const char* symbol, void** pfn, int hipVersion, uint64_t flags,
|
||||||
hipDriverProcAddressQueryResult* symbolStatus);
|
hipDriverProcAddressQueryResult* symbolStatus);
|
||||||
|
typedef hipError_t (*t_hipExtDisableLogging)();
|
||||||
|
typedef hipError_t (*t_hipExtEnableLogging)();
|
||||||
|
typedef hipError_t (*t_hipExtSetLoggingParams)(size_t log_level, size_t log_size, size_t log_mask);
|
||||||
|
|
||||||
typedef hipError_t (*t_hipKernelGetParamInfo)(hipKernel_t kernel, size_t paramIndex,
|
typedef hipError_t (*t_hipKernelGetParamInfo)(hipKernel_t kernel, size_t paramIndex,
|
||||||
size_t* paramOffset, size_t* paramSize);
|
size_t* paramOffset, size_t* paramSize);
|
||||||
@@ -1707,8 +1710,13 @@ struct HipDispatchTable {
|
|||||||
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 20
|
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 20
|
||||||
t_hipKernelGetParamInfo hipKernelGetParamInfo_fn;
|
t_hipKernelGetParamInfo hipKernelGetParamInfo_fn;
|
||||||
|
|
||||||
// DO NOT EDIT ABOVE!
|
|
||||||
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 21
|
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 21
|
||||||
|
t_hipExtDisableLogging hipExtDisableLogging_fn;
|
||||||
|
t_hipExtEnableLogging hipExtEnableLogging_fn;
|
||||||
|
t_hipExtSetLoggingParams hipExtSetLoggingParams_fn;
|
||||||
|
|
||||||
|
// DO NOT EDIT ABOVE!
|
||||||
|
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 22
|
||||||
|
|
||||||
// ******************************************************************************************* //
|
// ******************************************************************************************* //
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -196,6 +196,9 @@ __CG_STATIC_QUALIFIER__ dim3 grid_dim() {
|
|||||||
static_cast<__hip_uint32_t>(gridDim.z)));
|
static_cast<__hip_uint32_t>(gridDim.z)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__CG_STATIC_QUALIFIER__ unsigned int barrier_signal() { return __ockl_grid_bar_arrive(); }
|
||||||
|
|
||||||
|
__CG_STATIC_QUALIFIER__ void barrier_wait(unsigned int s) { __ockl_grid_bar_wait(s); }
|
||||||
} // namespace grid
|
} // namespace grid
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -238,6 +241,23 @@ __CG_STATIC_QUALIFIER__ dim3 block_dim() {
|
|||||||
static_cast<__hip_uint32_t>(blockDim.z)));
|
static_cast<__hip_uint32_t>(blockDim.z)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__CG_STATIC_QUALIFIER__ void barrier_arrive() {
|
||||||
|
__builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
|
||||||
|
#if __has_builtin(__builtin_amdgcn_s_barrier_signal) && \
|
||||||
|
__has_builtin(__builtin_amdgcn_s_barrier_wait)
|
||||||
|
__builtin_amdgcn_s_barrier_signal(-1);
|
||||||
|
#endif // __builtin_amdgcn_s_barrier_signal && __builtin_amdgcn_s_barrier_wait
|
||||||
|
}
|
||||||
|
|
||||||
|
__CG_STATIC_QUALIFIER__ void barrier_wait() {
|
||||||
|
#if __has_builtin(__builtin_amdgcn_s_barrier_signal) && \
|
||||||
|
__has_builtin(__builtin_amdgcn_s_barrier_wait)
|
||||||
|
__builtin_amdgcn_s_barrier_wait(-1);
|
||||||
|
#else
|
||||||
|
__builtin_amdgcn_s_barrier();
|
||||||
|
#endif // __builtin_amdgcn_s_barrier_signal && __builtin_amdgcn_s_barrier_wait
|
||||||
|
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
|
||||||
|
}
|
||||||
} // namespace workgroup
|
} // namespace workgroup
|
||||||
|
|
||||||
namespace tiled_group {
|
namespace tiled_group {
|
||||||
|
|||||||
@@ -468,7 +468,10 @@ enum hip_api_id_t {
|
|||||||
HIP_API_ID_hipKernelGetName = 448,
|
HIP_API_ID_hipKernelGetName = 448,
|
||||||
HIP_API_ID_hipOccupancyAvailableDynamicSMemPerBlock = 449,
|
HIP_API_ID_hipOccupancyAvailableDynamicSMemPerBlock = 449,
|
||||||
HIP_API_ID_hipKernelGetParamInfo = 450,
|
HIP_API_ID_hipKernelGetParamInfo = 450,
|
||||||
HIP_API_ID_LAST = 450,
|
HIP_API_ID_hipExtDisableLogging = 451,
|
||||||
|
HIP_API_ID_hipExtEnableLogging = 452,
|
||||||
|
HIP_API_ID_hipExtSetLoggingParams = 453,
|
||||||
|
HIP_API_ID_LAST = 453,
|
||||||
|
|
||||||
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
|
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
|
||||||
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
|
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
|
||||||
@@ -590,12 +593,15 @@ static inline const char* hip_api_name(const uint32_t id) {
|
|||||||
case HIP_API_ID_hipEventRecord: return "hipEventRecord";
|
case HIP_API_ID_hipEventRecord: return "hipEventRecord";
|
||||||
case HIP_API_ID_hipEventRecordWithFlags: return "hipEventRecordWithFlags";
|
case HIP_API_ID_hipEventRecordWithFlags: return "hipEventRecordWithFlags";
|
||||||
case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize";
|
case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize";
|
||||||
|
case HIP_API_ID_hipExtDisableLogging: return "hipExtDisableLogging";
|
||||||
|
case HIP_API_ID_hipExtEnableLogging: return "hipExtEnableLogging";
|
||||||
case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError";
|
case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError";
|
||||||
case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount";
|
case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount";
|
||||||
case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel";
|
case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel";
|
||||||
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice";
|
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice";
|
||||||
case HIP_API_ID_hipExtMallocWithFlags: return "hipExtMallocWithFlags";
|
case HIP_API_ID_hipExtMallocWithFlags: return "hipExtMallocWithFlags";
|
||||||
case HIP_API_ID_hipExtModuleLaunchKernel: return "hipExtModuleLaunchKernel";
|
case HIP_API_ID_hipExtModuleLaunchKernel: return "hipExtModuleLaunchKernel";
|
||||||
|
case HIP_API_ID_hipExtSetLoggingParams: return "hipExtSetLoggingParams";
|
||||||
case HIP_API_ID_hipExtStreamCreateWithCUMask: return "hipExtStreamCreateWithCUMask";
|
case HIP_API_ID_hipExtStreamCreateWithCUMask: return "hipExtStreamCreateWithCUMask";
|
||||||
case HIP_API_ID_hipExtStreamGetCUMask: return "hipExtStreamGetCUMask";
|
case HIP_API_ID_hipExtStreamGetCUMask: return "hipExtStreamGetCUMask";
|
||||||
case HIP_API_ID_hipExternalMemoryGetMappedBuffer: return "hipExternalMemoryGetMappedBuffer";
|
case HIP_API_ID_hipExternalMemoryGetMappedBuffer: return "hipExternalMemoryGetMappedBuffer";
|
||||||
@@ -1034,12 +1040,15 @@ static inline uint32_t hipApiIdByName(const char* name) {
|
|||||||
if (strcmp("hipEventRecord", name) == 0) return HIP_API_ID_hipEventRecord;
|
if (strcmp("hipEventRecord", name) == 0) return HIP_API_ID_hipEventRecord;
|
||||||
if (strcmp("hipEventRecordWithFlags", name) == 0) return HIP_API_ID_hipEventRecordWithFlags;
|
if (strcmp("hipEventRecordWithFlags", name) == 0) return HIP_API_ID_hipEventRecordWithFlags;
|
||||||
if (strcmp("hipEventSynchronize", name) == 0) return HIP_API_ID_hipEventSynchronize;
|
if (strcmp("hipEventSynchronize", name) == 0) return HIP_API_ID_hipEventSynchronize;
|
||||||
|
if (strcmp("hipExtDisableLogging", name) == 0) return HIP_API_ID_hipExtDisableLogging;
|
||||||
|
if (strcmp("hipExtEnableLogging", name) == 0) return HIP_API_ID_hipExtEnableLogging;
|
||||||
if (strcmp("hipExtGetLastError", name) == 0) return HIP_API_ID_hipExtGetLastError;
|
if (strcmp("hipExtGetLastError", name) == 0) return HIP_API_ID_hipExtGetLastError;
|
||||||
if (strcmp("hipExtGetLinkTypeAndHopCount", name) == 0) return HIP_API_ID_hipExtGetLinkTypeAndHopCount;
|
if (strcmp("hipExtGetLinkTypeAndHopCount", name) == 0) return HIP_API_ID_hipExtGetLinkTypeAndHopCount;
|
||||||
if (strcmp("hipExtLaunchKernel", name) == 0) return HIP_API_ID_hipExtLaunchKernel;
|
if (strcmp("hipExtLaunchKernel", name) == 0) return HIP_API_ID_hipExtLaunchKernel;
|
||||||
if (strcmp("hipExtLaunchMultiKernelMultiDevice", name) == 0) return HIP_API_ID_hipExtLaunchMultiKernelMultiDevice;
|
if (strcmp("hipExtLaunchMultiKernelMultiDevice", name) == 0) return HIP_API_ID_hipExtLaunchMultiKernelMultiDevice;
|
||||||
if (strcmp("hipExtMallocWithFlags", name) == 0) return HIP_API_ID_hipExtMallocWithFlags;
|
if (strcmp("hipExtMallocWithFlags", name) == 0) return HIP_API_ID_hipExtMallocWithFlags;
|
||||||
if (strcmp("hipExtModuleLaunchKernel", name) == 0) return HIP_API_ID_hipExtModuleLaunchKernel;
|
if (strcmp("hipExtModuleLaunchKernel", name) == 0) return HIP_API_ID_hipExtModuleLaunchKernel;
|
||||||
|
if (strcmp("hipExtSetLoggingParams", name) == 0) return HIP_API_ID_hipExtSetLoggingParams;
|
||||||
if (strcmp("hipExtStreamCreateWithCUMask", name) == 0) return HIP_API_ID_hipExtStreamCreateWithCUMask;
|
if (strcmp("hipExtStreamCreateWithCUMask", name) == 0) return HIP_API_ID_hipExtStreamCreateWithCUMask;
|
||||||
if (strcmp("hipExtStreamGetCUMask", name) == 0) return HIP_API_ID_hipExtStreamGetCUMask;
|
if (strcmp("hipExtStreamGetCUMask", name) == 0) return HIP_API_ID_hipExtStreamGetCUMask;
|
||||||
if (strcmp("hipExternalMemoryGetMappedBuffer", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedBuffer;
|
if (strcmp("hipExternalMemoryGetMappedBuffer", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedBuffer;
|
||||||
@@ -1851,6 +1860,11 @@ typedef struct hip_api_data_s {
|
|||||||
hipEvent_t stopEvent;
|
hipEvent_t stopEvent;
|
||||||
unsigned int flags;
|
unsigned int flags;
|
||||||
} hipExtModuleLaunchKernel;
|
} hipExtModuleLaunchKernel;
|
||||||
|
struct {
|
||||||
|
size_t log_level;
|
||||||
|
size_t log_size;
|
||||||
|
size_t log_mask;
|
||||||
|
} hipExtSetLoggingParams;
|
||||||
struct {
|
struct {
|
||||||
hipStream_t* stream;
|
hipStream_t* stream;
|
||||||
hipStream_t stream__val;
|
hipStream_t stream__val;
|
||||||
@@ -4484,6 +4498,12 @@ typedef struct hip_api_data_s {
|
|||||||
#define INIT_hipEventSynchronize_CB_ARGS_DATA(cb_data) { \
|
#define INIT_hipEventSynchronize_CB_ARGS_DATA(cb_data) { \
|
||||||
cb_data.args.hipEventSynchronize.event = (hipEvent_t)event; \
|
cb_data.args.hipEventSynchronize.event = (hipEvent_t)event; \
|
||||||
};
|
};
|
||||||
|
// hipExtDisableLogging[]
|
||||||
|
#define INIT_hipExtDisableLogging_CB_ARGS_DATA(cb_data) { \
|
||||||
|
};
|
||||||
|
// hipExtEnableLogging[]
|
||||||
|
#define INIT_hipExtEnableLogging_CB_ARGS_DATA(cb_data) { \
|
||||||
|
};
|
||||||
// hipExtGetLastError[]
|
// hipExtGetLastError[]
|
||||||
#define INIT_hipExtGetLastError_CB_ARGS_DATA(cb_data) { \
|
#define INIT_hipExtGetLastError_CB_ARGS_DATA(cb_data) { \
|
||||||
};
|
};
|
||||||
@@ -4535,6 +4555,12 @@ typedef struct hip_api_data_s {
|
|||||||
cb_data.args.hipExtModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \
|
cb_data.args.hipExtModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \
|
||||||
cb_data.args.hipExtModuleLaunchKernel.flags = (unsigned int)flags; \
|
cb_data.args.hipExtModuleLaunchKernel.flags = (unsigned int)flags; \
|
||||||
};
|
};
|
||||||
|
// hipExtSetLoggingParams[('size_t', 'log_level'), ('size_t', 'log_size'), ('size_t', 'log_mask')]
|
||||||
|
#define INIT_hipExtSetLoggingParams_CB_ARGS_DATA(cb_data) { \
|
||||||
|
cb_data.args.hipExtSetLoggingParams.log_level = (size_t)log_level; \
|
||||||
|
cb_data.args.hipExtSetLoggingParams.log_size = (size_t)log_size; \
|
||||||
|
cb_data.args.hipExtSetLoggingParams.log_mask = (size_t)log_mask; \
|
||||||
|
};
|
||||||
// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')]
|
// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')]
|
||||||
#define INIT_hipExtStreamCreateWithCUMask_CB_ARGS_DATA(cb_data) { \
|
#define INIT_hipExtStreamCreateWithCUMask_CB_ARGS_DATA(cb_data) { \
|
||||||
cb_data.args.hipExtStreamCreateWithCUMask.stream = (hipStream_t*)stream; \
|
cb_data.args.hipExtStreamCreateWithCUMask.stream = (hipStream_t*)stream; \
|
||||||
@@ -7125,6 +7151,12 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
|
|||||||
// hipEventSynchronize[('hipEvent_t', 'event')]
|
// hipEventSynchronize[('hipEvent_t', 'event')]
|
||||||
case HIP_API_ID_hipEventSynchronize:
|
case HIP_API_ID_hipEventSynchronize:
|
||||||
break;
|
break;
|
||||||
|
// hipExtDisableLogging[]
|
||||||
|
case HIP_API_ID_hipExtDisableLogging:
|
||||||
|
break;
|
||||||
|
// hipExtEnableLogging[]
|
||||||
|
case HIP_API_ID_hipExtEnableLogging:
|
||||||
|
break;
|
||||||
// hipExtGetLastError[]
|
// hipExtGetLastError[]
|
||||||
case HIP_API_ID_hipExtGetLastError:
|
case HIP_API_ID_hipExtGetLastError:
|
||||||
break;
|
break;
|
||||||
@@ -7150,6 +7182,9 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
|
|||||||
if (data->args.hipExtModuleLaunchKernel.kernelParams) data->args.hipExtModuleLaunchKernel.kernelParams__val = *(data->args.hipExtModuleLaunchKernel.kernelParams);
|
if (data->args.hipExtModuleLaunchKernel.kernelParams) data->args.hipExtModuleLaunchKernel.kernelParams__val = *(data->args.hipExtModuleLaunchKernel.kernelParams);
|
||||||
if (data->args.hipExtModuleLaunchKernel.extra) data->args.hipExtModuleLaunchKernel.extra__val = *(data->args.hipExtModuleLaunchKernel.extra);
|
if (data->args.hipExtModuleLaunchKernel.extra) data->args.hipExtModuleLaunchKernel.extra__val = *(data->args.hipExtModuleLaunchKernel.extra);
|
||||||
break;
|
break;
|
||||||
|
// hipExtSetLoggingParams[('size_t', 'log_level'), ('size_t', 'log_size'), ('size_t', 'log_mask')]
|
||||||
|
case HIP_API_ID_hipExtSetLoggingParams:
|
||||||
|
break;
|
||||||
// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')]
|
// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')]
|
||||||
case HIP_API_ID_hipExtStreamCreateWithCUMask:
|
case HIP_API_ID_hipExtStreamCreateWithCUMask:
|
||||||
if (data->args.hipExtStreamCreateWithCUMask.stream) data->args.hipExtStreamCreateWithCUMask.stream__val = *(data->args.hipExtStreamCreateWithCUMask.stream);
|
if (data->args.hipExtStreamCreateWithCUMask.stream) data->args.hipExtStreamCreateWithCUMask.stream__val = *(data->args.hipExtStreamCreateWithCUMask.stream);
|
||||||
@@ -9124,6 +9159,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
|
|||||||
oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventSynchronize.event);
|
oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventSynchronize.event);
|
||||||
oss << ")";
|
oss << ")";
|
||||||
break;
|
break;
|
||||||
|
case HIP_API_ID_hipExtDisableLogging:
|
||||||
|
oss << "hipExtDisableLogging(";
|
||||||
|
oss << ")";
|
||||||
|
break;
|
||||||
|
case HIP_API_ID_hipExtEnableLogging:
|
||||||
|
oss << "hipExtEnableLogging(";
|
||||||
|
oss << ")";
|
||||||
|
break;
|
||||||
case HIP_API_ID_hipExtGetLastError:
|
case HIP_API_ID_hipExtGetLastError:
|
||||||
oss << "hipExtGetLastError(";
|
oss << "hipExtGetLastError(";
|
||||||
oss << ")";
|
oss << ")";
|
||||||
@@ -9188,6 +9231,13 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
|
|||||||
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.flags);
|
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.flags);
|
||||||
oss << ")";
|
oss << ")";
|
||||||
break;
|
break;
|
||||||
|
case HIP_API_ID_hipExtSetLoggingParams:
|
||||||
|
oss << "hipExtSetLoggingParams(";
|
||||||
|
oss << "log_level="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtSetLoggingParams.log_level);
|
||||||
|
oss << ", log_size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtSetLoggingParams.log_size);
|
||||||
|
oss << ", log_mask="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtSetLoggingParams.log_mask);
|
||||||
|
oss << ")";
|
||||||
|
break;
|
||||||
case HIP_API_ID_hipExtStreamCreateWithCUMask:
|
case HIP_API_ID_hipExtStreamCreateWithCUMask:
|
||||||
oss << "hipExtStreamCreateWithCUMask(";
|
oss << "hipExtStreamCreateWithCUMask(";
|
||||||
if (data->args.hipExtStreamCreateWithCUMask.stream == NULL) oss << "stream=NULL";
|
if (data->args.hipExtStreamCreateWithCUMask.stream == NULL) oss << "stream=NULL";
|
||||||
|
|||||||
@@ -110,6 +110,7 @@ target_sources(amdhip64 PRIVATE
|
|||||||
hip_graph.cpp
|
hip_graph.cpp
|
||||||
hip_hmm.cpp
|
hip_hmm.cpp
|
||||||
hip_intercept.cpp
|
hip_intercept.cpp
|
||||||
|
hip_log.cpp
|
||||||
hip_memory.cpp
|
hip_memory.cpp
|
||||||
hip_mempool.cpp
|
hip_mempool.cpp
|
||||||
hip_mempool_impl.cpp
|
hip_mempool_impl.cpp
|
||||||
|
|||||||
@@ -523,3 +523,6 @@ hipKernelGetName
|
|||||||
hipOccupancyAvailableDynamicSMemPerBlock
|
hipOccupancyAvailableDynamicSMemPerBlock
|
||||||
hipGetProcAddress_spt
|
hipGetProcAddress_spt
|
||||||
hipKernelGetParamInfo
|
hipKernelGetParamInfo
|
||||||
|
hipExtDisableLogging
|
||||||
|
hipExtEnableLogging
|
||||||
|
hipExtSetLoggingParams
|
||||||
|
|||||||
@@ -885,6 +885,9 @@ hipError_t hipOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, con
|
|||||||
int numBlocks, int blockSize);
|
int numBlocks, int blockSize);
|
||||||
hipError_t hipKernelGetParamInfo(hipKernel_t kernel, size_t paramIndex, size_t* paramOffset,
|
hipError_t hipKernelGetParamInfo(hipKernel_t kernel, size_t paramIndex, size_t* paramOffset,
|
||||||
size_t* paramSize);
|
size_t* paramSize);
|
||||||
|
hipError_t hipExtDisableLogging();
|
||||||
|
hipError_t hipExtEnableLogging();
|
||||||
|
hipError_t hipExtSetLoggingParams(size_t log_level, size_t log_size, size_t log_mask);
|
||||||
} // namespace hip
|
} // namespace hip
|
||||||
|
|
||||||
namespace hip {
|
namespace hip {
|
||||||
@@ -1432,6 +1435,9 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
|
|||||||
ptrDispatchTable->hipKernelGetName_fn = hip::hipKernelGetName;
|
ptrDispatchTable->hipKernelGetName_fn = hip::hipKernelGetName;
|
||||||
ptrDispatchTable->hipOccupancyAvailableDynamicSMemPerBlock_fn = hip::hipOccupancyAvailableDynamicSMemPerBlock;
|
ptrDispatchTable->hipOccupancyAvailableDynamicSMemPerBlock_fn = hip::hipOccupancyAvailableDynamicSMemPerBlock;
|
||||||
ptrDispatchTable->hipKernelGetParamInfo_fn = hip::hipKernelGetParamInfo;
|
ptrDispatchTable->hipKernelGetParamInfo_fn = hip::hipKernelGetParamInfo;
|
||||||
|
ptrDispatchTable->hipExtDisableLogging_fn = hip::hipExtDisableLogging;
|
||||||
|
ptrDispatchTable->hipExtEnableLogging_fn = hip::hipExtEnableLogging;
|
||||||
|
ptrDispatchTable->hipExtSetLoggingParams_fn = hip::hipExtSetLoggingParams;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HIP_ROCPROFILER_REGISTER > 0
|
#if HIP_ROCPROFILER_REGISTER > 0
|
||||||
@@ -2114,15 +2120,19 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipOccupancyAvailableDynamicSMemPerBlock_fn, 5
|
|||||||
HIP_ENFORCE_ABI(HipDispatchTable, hipGetProcAddress_spt_fn, 506);
|
HIP_ENFORCE_ABI(HipDispatchTable, hipGetProcAddress_spt_fn, 506);
|
||||||
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 20
|
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 20
|
||||||
HIP_ENFORCE_ABI(HipDispatchTable, hipKernelGetParamInfo_fn, 507);
|
HIP_ENFORCE_ABI(HipDispatchTable, hipKernelGetParamInfo_fn, 507);
|
||||||
|
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 21
|
||||||
|
HIP_ENFORCE_ABI(HipDispatchTable, hipExtDisableLogging_fn, 508);
|
||||||
|
HIP_ENFORCE_ABI(HipDispatchTable, hipExtEnableLogging_fn, 509);
|
||||||
|
HIP_ENFORCE_ABI(HipDispatchTable, hipExtSetLoggingParams_fn, 510);
|
||||||
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
|
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
|
||||||
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
|
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
|
||||||
//
|
//
|
||||||
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
|
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
|
||||||
//
|
//
|
||||||
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
|
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
|
||||||
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 508)
|
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 511)
|
||||||
|
|
||||||
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 20,
|
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 21,
|
||||||
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
|
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
|
||||||
"pointers and then update this check so it is true");
|
"pointers and then update this check so it is true");
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -645,6 +645,9 @@ global:
|
|||||||
hipOccupancyAvailableDynamicSMemPerBlock;
|
hipOccupancyAvailableDynamicSMemPerBlock;
|
||||||
hipGetProcAddress_spt;
|
hipGetProcAddress_spt;
|
||||||
hipKernelGetParamInfo;
|
hipKernelGetParamInfo;
|
||||||
|
hipExtDisableLogging;
|
||||||
|
hipExtEnableLogging;
|
||||||
|
hipExtSetLoggingParams;
|
||||||
local:
|
local:
|
||||||
*;
|
*;
|
||||||
} hip_7.1;
|
} hip_7.1;
|
||||||
|
|||||||
@@ -0,0 +1,31 @@
|
|||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
#include "hip_internal.hpp"
|
||||||
|
#include "hip_platform.hpp"
|
||||||
|
|
||||||
|
namespace hip {
|
||||||
|
|
||||||
|
hipError_t hipExtEnableLogging() {
|
||||||
|
HIP_INIT_API(hipExtEnableLogging);
|
||||||
|
amd::ScopedLock lock(PlatformState::instance().getLogLock());
|
||||||
|
AMD_LOG_LEVEL = PlatformState::instance().log_level_;
|
||||||
|
AMD_LOG_MASK = PlatformState::instance().log_mask_;
|
||||||
|
HIP_RETURN(hipSuccess);
|
||||||
|
}
|
||||||
|
|
||||||
|
hipError_t hipExtDisableLogging() {
|
||||||
|
HIP_INIT_API(hipExtDisableLogging);
|
||||||
|
amd::ScopedLock lock(PlatformState::instance().getLogLock());
|
||||||
|
AMD_LOG_LEVEL = 0;
|
||||||
|
HIP_RETURN(hipSuccess);
|
||||||
|
}
|
||||||
|
|
||||||
|
hipError_t hipExtSetLoggingParams(size_t log_level, size_t log_size, size_t log_mask) {
|
||||||
|
HIP_INIT_API(hipExtSetLoggingParams, log_level, log_size, log_mask);
|
||||||
|
amd::ScopedLock lock(PlatformState::instance().getLogLock());
|
||||||
|
// Store logging parameters for later activation
|
||||||
|
PlatformState::instance().log_level_ = log_level;
|
||||||
|
PlatformState::instance().log_size_ = log_size;
|
||||||
|
PlatformState::instance().log_mask_ = log_mask;
|
||||||
|
HIP_RETURN(hipSuccess);
|
||||||
|
}
|
||||||
|
} // namespace::hip
|
||||||
@@ -50,9 +50,12 @@ class PlatformState {
|
|||||||
// Unique FD Store Lock
|
// Unique FD Store Lock
|
||||||
amd::Monitor ufd_lock_{true};
|
amd::Monitor ufd_lock_{true};
|
||||||
|
|
||||||
|
// Lock for logging operations
|
||||||
|
amd::Monitor lg_lock_{true};
|
||||||
|
|
||||||
// Singleton object
|
// Singleton object
|
||||||
static PlatformState* platform_;
|
static PlatformState* platform_;
|
||||||
PlatformState() {}
|
PlatformState() : log_level_(0), log_size_(0), log_mask_(0) {}
|
||||||
~PlatformState() {}
|
~PlatformState() {}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -113,6 +116,14 @@ class PlatformState {
|
|||||||
|
|
||||||
size_t UfdMapSize() const { return ufd_map_.size(); }
|
size_t UfdMapSize() const { return ufd_map_.size(); }
|
||||||
|
|
||||||
|
// Logging lock accessor
|
||||||
|
amd::Monitor& getLogLock() { return lg_lock_; }
|
||||||
|
|
||||||
|
// Friend functions for logging access
|
||||||
|
friend hipError_t hipExtEnableLogging();
|
||||||
|
friend hipError_t hipExtDisableLogging();
|
||||||
|
friend hipError_t hipExtSetLoggingParams(size_t log_level, size_t log_size, size_t log_mask);
|
||||||
|
|
||||||
inline bool RegisterLibraryFunction(const hipKernel_t f, const hipLibrary_t l) {
|
inline bool RegisterLibraryFunction(const hipKernel_t f, const hipLibrary_t l) {
|
||||||
amd::ScopedLock lock(lock_);
|
amd::ScopedLock lock(lock_);
|
||||||
if (library_functions_.find(f) == library_functions_.end()) {
|
if (library_functions_.find(f) == library_functions_.end()) {
|
||||||
@@ -150,5 +161,10 @@ class PlatformState {
|
|||||||
|
|
||||||
void* dynamicLibraryHandle_{nullptr};
|
void* dynamicLibraryHandle_{nullptr};
|
||||||
std::unordered_map<hipKernel_t, hipLibrary_t> library_functions_;
|
std::unordered_map<hipKernel_t, hipLibrary_t> library_functions_;
|
||||||
|
|
||||||
|
// Logging state (moved from LoggingInfo singleton)
|
||||||
|
size_t log_level_;
|
||||||
|
size_t log_size_;
|
||||||
|
size_t log_mask_;
|
||||||
};
|
};
|
||||||
} // namespace hip
|
} // namespace hip
|
||||||
|
|||||||
@@ -2067,4 +2067,13 @@ hipError_t hipKernelGetParamInfo(hipKernel_t kernel, size_t paramIndex, size_t*
|
|||||||
size_t* paramSize) {
|
size_t* paramSize) {
|
||||||
return hip::GetHipDispatchTable()->hipKernelGetParamInfo_fn(kernel, paramIndex, paramOffset,
|
return hip::GetHipDispatchTable()->hipKernelGetParamInfo_fn(kernel, paramIndex, paramOffset,
|
||||||
paramSize);
|
paramSize);
|
||||||
|
}
|
||||||
|
hipError_t hipExtEnableLogging() {
|
||||||
|
return hip::GetHipDispatchTable()->hipExtEnableLogging_fn();
|
||||||
|
}
|
||||||
|
hipError_t hipExtDisableLogging() {
|
||||||
|
return hip::GetHipDispatchTable()->hipExtDisableLogging_fn();
|
||||||
|
}
|
||||||
|
hipError_t hipExtSetLoggingParams(size_t log_level, size_t log_size, size_t log_mask) {
|
||||||
|
return hip::GetHipDispatchTable()->hipExtSetLoggingParams_fn(log_level, log_size, log_mask);
|
||||||
}
|
}
|
||||||
@@ -138,14 +138,6 @@
|
|||||||
"=== TODO ===",
|
"=== TODO ===",
|
||||||
"Unit_Device_tgammaf_Accuracy_Limited_Positive",
|
"Unit_Device_tgammaf_Accuracy_Limited_Positive",
|
||||||
"=== TODO === fail on 100% test data",
|
"=== TODO === fail on 100% test data",
|
||||||
"Unit_Device_hexp10_Accuracy_Positive",
|
|
||||||
"Unit_Device_h2exp10_Accuracy_Positive",
|
|
||||||
"Unit_Device_hexp2_Accuracy_Positive",
|
|
||||||
"Unit_Device_h2exp2_Accuracy_Positive",
|
|
||||||
"Unit_Device_hlog_Accuracy_Positive",
|
|
||||||
"Unit_Device_h2log_Accuracy_Positive",
|
|
||||||
"Unit_Device_hlog10_Accuracy_Positive",
|
|
||||||
"Unit_Device_h2log10_Accuracy_Positive",
|
|
||||||
"Unit_Device___hfma2_Accuracy_Positive",
|
"Unit_Device___hfma2_Accuracy_Positive",
|
||||||
#endif
|
#endif
|
||||||
#if defined gfx90a || defined gfx942 || defined gfx950
|
#if defined gfx90a || defined gfx942 || defined gfx950
|
||||||
|
|||||||
@@ -752,6 +752,8 @@
|
|||||||
#endif
|
#endif
|
||||||
"=== Following tests disabled as it should be a local perf test",
|
"=== Following tests disabled as it should be a local perf test",
|
||||||
"Performance_hipExtLaunchKernelGGL_QueryGPUFrequency",
|
"Performance_hipExtLaunchKernelGGL_QueryGPUFrequency",
|
||||||
|
"Unit_hipDynamicLogging_Positive_Basic",
|
||||||
|
"Unit_hipDynamicLogging_Positive_MultipleEnableDisable",
|
||||||
"End of json"
|
"End of json"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ set(TEST_SRC
|
|||||||
binary_partition.cc
|
binary_partition.cc
|
||||||
cg_ballot.cc
|
cg_ballot.cc
|
||||||
cg_any_all.cc
|
cg_any_all.cc
|
||||||
|
split_barrier.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
if(HIP_PLATFORM STREQUAL "nvidia")
|
if(HIP_PLATFORM STREQUAL "nvidia")
|
||||||
|
|||||||
@@ -0,0 +1,123 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <hip/hip_cooperative_groups.h>
|
||||||
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
|
static __global__ void wg_split_barrier(float *out, float *in) {
|
||||||
|
namespace cg = cooperative_groups;
|
||||||
|
|
||||||
|
__shared__ float mid[32];
|
||||||
|
size_t i = threadIdx.x;
|
||||||
|
auto tb = cg::this_thread_block();
|
||||||
|
|
||||||
|
out[i] = in[i] * 2.0f;
|
||||||
|
|
||||||
|
auto tok = tb.barrier_arrive();
|
||||||
|
|
||||||
|
// use tid 0 to populate shared mem
|
||||||
|
if (i == 0) {
|
||||||
|
for (size_t j = 0; j < 32; j++) {
|
||||||
|
mid[j] = in[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tb.barrier_wait(std::move(tok));
|
||||||
|
|
||||||
|
out[i] += mid[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Unit_coop_thread_block_split_barrier") {
|
||||||
|
constexpr size_t size = 32;
|
||||||
|
float *d_out, *d_in;
|
||||||
|
|
||||||
|
HIP_CHECK(hipMalloc(&d_out, sizeof(float) * size));
|
||||||
|
HIP_CHECK(hipMalloc(&d_in, sizeof(float) * size));
|
||||||
|
|
||||||
|
std::vector<float> in(size, 0.0f), out = in;
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
in[i] = i + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
HIP_CHECK(hipMemset(d_out, 0, sizeof(float) * size));
|
||||||
|
HIP_CHECK(
|
||||||
|
hipMemcpy(d_in, in.data(), sizeof(float) * size, hipMemcpyHostToDevice));
|
||||||
|
wg_split_barrier<<<1, size>>>(d_out, d_in);
|
||||||
|
HIP_CHECK(hipMemcpy(out.data(), d_out, sizeof(float) * size,
|
||||||
|
hipMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
HIP_CHECK(hipFree(d_out));
|
||||||
|
HIP_CHECK(hipFree(d_in));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
INFO("Index: " << i << " in: " << in[i] << " out: " << out[i]);
|
||||||
|
REQUIRE((in[i] * 3.0f) == Catch::Approx(out[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static __global__ void grid_split_barrier(int *data, int *result, int N) {
|
||||||
|
namespace cg = cooperative_groups;
|
||||||
|
cg::grid_group grid = cg::this_grid();
|
||||||
|
|
||||||
|
int gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
auto tok = grid.barrier_arrive();
|
||||||
|
if (gid < N) {
|
||||||
|
data[gid] = gid + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
grid.barrier_wait(std::move(tok));
|
||||||
|
|
||||||
|
if (grid.thread_rank() == 0) {
|
||||||
|
int sum = 0;
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
sum += data[i];
|
||||||
|
*result = sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Unit_coop_grids_split_barrier") {
|
||||||
|
hipDeviceProp_t prop;
|
||||||
|
HIP_CHECK(hipGetDeviceProperties(&prop, 0));
|
||||||
|
|
||||||
|
if (prop.cooperativeLaunch != 0) {
|
||||||
|
int N = 1024;
|
||||||
|
const int threads = 128;
|
||||||
|
const int blocks = (N + threads - 1) / threads;
|
||||||
|
|
||||||
|
int *d_in, *d_out;
|
||||||
|
HIP_CHECK(hipMalloc(&d_in, N * sizeof(int)));
|
||||||
|
HIP_CHECK(hipMalloc(&d_out, sizeof(int)));
|
||||||
|
|
||||||
|
void *args[] = {&d_in, &d_out, &N};
|
||||||
|
|
||||||
|
dim3 grid(blocks);
|
||||||
|
dim3 block(threads);
|
||||||
|
|
||||||
|
HIP_CHECK(hipLaunchCooperativeKernel((void *)grid_split_barrier, grid,
|
||||||
|
block, args, 0, 0));
|
||||||
|
HIP_CHECK(hipDeviceSynchronize());
|
||||||
|
|
||||||
|
int out = 0;
|
||||||
|
HIP_CHECK(hipMemcpy(&out, d_out, sizeof(int), hipMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
HIP_CHECK(hipFree(d_in));
|
||||||
|
HIP_CHECK(hipFree(d_out));
|
||||||
|
REQUIRE(out == ((N * (N + 1)) / 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,6 +7,7 @@ set(TEST_SRC
|
|||||||
hipDrvGetErrorString.cc
|
hipDrvGetErrorString.cc
|
||||||
hipGetLastError.cc
|
hipGetLastError.cc
|
||||||
hipPeekAtLastError.cc
|
hipPeekAtLastError.cc
|
||||||
|
hipDynamicLogging.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
if(UNIX)
|
if(UNIX)
|
||||||
|
|||||||
@@ -0,0 +1,134 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <streambuf>
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#include <io.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#define dup _dup
|
||||||
|
#define dup2 _dup2
|
||||||
|
#define fd_close _close
|
||||||
|
#define unlink _unlink
|
||||||
|
#define STDERR_FD _fileno(stderr)
|
||||||
|
#define OPEN_FLAGS (_O_WRONLY | _O_CREAT | _O_TRUNC)
|
||||||
|
#define OPEN_MODE (_S_IREAD | _S_IWRITE)
|
||||||
|
#define open _open
|
||||||
|
#else
|
||||||
|
#include <unistd.h>
|
||||||
|
#define fd_close close
|
||||||
|
#define STDERR_FD STDERR_FILENO
|
||||||
|
#define OPEN_FLAGS (O_WRONLY | O_CREAT | O_TRUNC)
|
||||||
|
#define OPEN_MODE 0644
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Class to capture all stderr output (HIP logging uses stderr)
|
||||||
|
class OutCapture {
|
||||||
|
private:
|
||||||
|
std::stringstream captured_stream_;
|
||||||
|
std::streambuf* cerr_backup_;
|
||||||
|
int stderr_backup_;
|
||||||
|
std::string temp_file_;
|
||||||
|
|
||||||
|
static std::string getTempFilePath() {
|
||||||
|
#ifdef _WIN32
|
||||||
|
char temp_path[MAX_PATH];
|
||||||
|
if (GetTempPathA(MAX_PATH, temp_path)) {
|
||||||
|
return std::string(temp_path) + "hip_stderr_capture.txt";
|
||||||
|
}
|
||||||
|
// Fallback to current directory
|
||||||
|
return "hip_stderr_capture.txt";
|
||||||
|
#else
|
||||||
|
return "/tmp/hip_stderr_capture.txt";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
OutCapture() : temp_file_(getTempFilePath()) {
|
||||||
|
// Backup original cerr stream buffer (HIP logging uses stderr)
|
||||||
|
cerr_backup_ = std::cerr.rdbuf();
|
||||||
|
|
||||||
|
// Backup original stderr file descriptor
|
||||||
|
stderr_backup_ = dup(STDERR_FD);
|
||||||
|
}
|
||||||
|
|
||||||
|
void startCapture() {
|
||||||
|
// Clear any previous content
|
||||||
|
captured_stream_.str("");
|
||||||
|
captured_stream_.clear();
|
||||||
|
|
||||||
|
// Redirect std::cerr to our stringstream
|
||||||
|
std::cerr.rdbuf(captured_stream_.rdbuf());
|
||||||
|
|
||||||
|
// Redirect stderr file descriptor to temp file (for fprintf to stderr)
|
||||||
|
int temp_fd = open(temp_file_.c_str(), OPEN_FLAGS, OPEN_MODE);
|
||||||
|
if (temp_fd != -1) {
|
||||||
|
dup2(temp_fd, STDERR_FD);
|
||||||
|
fd_close(temp_fd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string stopCapture() {
|
||||||
|
// Restore original cerr stream
|
||||||
|
std::cerr.rdbuf(cerr_backup_);
|
||||||
|
|
||||||
|
// Restore original stderr file descriptor
|
||||||
|
dup2(stderr_backup_, STDERR_FD);
|
||||||
|
|
||||||
|
// Read from temp file (captures fprintf(stderr) output from HIP logging)
|
||||||
|
std::ifstream temp_file(temp_file_);
|
||||||
|
std::string file_content;
|
||||||
|
if (temp_file.is_open()) {
|
||||||
|
std::string line;
|
||||||
|
while (std::getline(temp_file, line)) {
|
||||||
|
file_content += line + "\n";
|
||||||
|
}
|
||||||
|
temp_file.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine both captures: C++ streams and file descriptor output
|
||||||
|
std::string stream_content = captured_stream_.str();
|
||||||
|
std::string total_output = stream_content + file_content;
|
||||||
|
|
||||||
|
// Clean up temp file
|
||||||
|
unlink(temp_file_.c_str());
|
||||||
|
|
||||||
|
return total_output;
|
||||||
|
}
|
||||||
|
|
||||||
|
~OutCapture() {
|
||||||
|
// Ensure everything is restored
|
||||||
|
std::cerr.rdbuf(cerr_backup_);
|
||||||
|
dup2(stderr_backup_, STDERR_FD);
|
||||||
|
fd_close(stderr_backup_);
|
||||||
|
unlink(temp_file_.c_str());
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -0,0 +1,156 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <hip_test_common.hh>
|
||||||
|
#include "OutCapture.hh"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @addtogroup hipDynamicLogging hipDynamicLogging
|
||||||
|
* @{
|
||||||
|
* @ingroup ErrorTest
|
||||||
|
* `hipExtSetLoggingParams(size_t log_level, size_t log_size, size_t log_mask)` -
|
||||||
|
* Sets logging parameters for HIP runtime.
|
||||||
|
* `hipExtEnableLogging()` -
|
||||||
|
* Enables HIP runtime logging.
|
||||||
|
* `hipExtDisableLogging()` -
|
||||||
|
* Disables HIP runtime logging.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static bool hipDynamicLoggingTest() {
|
||||||
|
// Create output capture instance
|
||||||
|
OutCapture capture;
|
||||||
|
capture.startCapture();
|
||||||
|
|
||||||
|
// Set Logging params
|
||||||
|
HIP_CHECK(hipExtSetLoggingParams(4, 0, -1));
|
||||||
|
|
||||||
|
// Logging is disabled here - allocate memory
|
||||||
|
int* dptr = nullptr;
|
||||||
|
HIP_CHECK(hipMalloc(&dptr, sizeof(int)));
|
||||||
|
|
||||||
|
// Stop capture after hipMalloc and check no output (logging disabled)
|
||||||
|
std::string malloc_output = capture.stopCapture();
|
||||||
|
if (malloc_output.size() != 0) {
|
||||||
|
INFO("Unexpected logging output during hipMalloc (logging should be disabled): " << malloc_output);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start capture before enabling logging
|
||||||
|
capture.startCapture();
|
||||||
|
|
||||||
|
// Enable logging and do memset
|
||||||
|
HIP_CHECK(hipExtEnableLogging());
|
||||||
|
HIP_CHECK(hipMemset(dptr, 0x00, sizeof(int)));
|
||||||
|
|
||||||
|
// Disable logging
|
||||||
|
HIP_CHECK(hipExtDisableLogging());
|
||||||
|
|
||||||
|
// Stop capture after disabling logging and check for output
|
||||||
|
std::string logging_output = capture.stopCapture();
|
||||||
|
if (logging_output.size() == 0) {
|
||||||
|
INFO("Expected logging output during enabled logging period, but got none");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
HIP_CHECK(hipFree(dptr));
|
||||||
|
|
||||||
|
INFO("Successfully captured HIP logging output (" << logging_output.size() << " bytes)");
|
||||||
|
INFO("Logging output: " << logging_output);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test Description
|
||||||
|
* ------------------------
|
||||||
|
* - Validates that HIP dynamic logging works correctly:
|
||||||
|
* 1. No output when logging is disabled
|
||||||
|
* 2. Logging output is captured when logging is enabled
|
||||||
|
* 3. hipMemset operation produces logging output during enabled period
|
||||||
|
* Test source
|
||||||
|
* ------------------------
|
||||||
|
* - unit/errorHandling/hipDynamicLogging.cc
|
||||||
|
* Test requirements
|
||||||
|
* ------------------------
|
||||||
|
* - HIP_VERSION >= 5.6
|
||||||
|
*/
|
||||||
|
TEST_CASE("Unit_hipDynamicLogging_Positive_Basic") {
|
||||||
|
int numDevices = 0;
|
||||||
|
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||||
|
|
||||||
|
if (numDevices <= 0) {
|
||||||
|
HipTest::HIP_SKIP_TEST("Skipping hipDynamicLogging test - no devices available");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
REQUIRE(hipDynamicLoggingTest() == true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test Description
|
||||||
|
* ------------------------
|
||||||
|
* - Validates that hipExtSetLoggingParams sets logging parameters correctly
|
||||||
|
* and that logging can be enabled/disabled multiple times
|
||||||
|
* Test source
|
||||||
|
* ------------------------
|
||||||
|
* - unit/errorHandling/hipDynamicLogging.cc
|
||||||
|
* Test requirements
|
||||||
|
* ------------------------
|
||||||
|
* - HIP_VERSION >= 5.6
|
||||||
|
*/
|
||||||
|
TEST_CASE("Unit_hipDynamicLogging_Positive_MultipleEnableDisable") {
|
||||||
|
int numDevices = 0;
|
||||||
|
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||||
|
|
||||||
|
if (numDevices <= 0) {
|
||||||
|
HipTest::HIP_SKIP_TEST("Skipping hipDynamicLogging test - no devices available");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test multiple enable/disable cycles
|
||||||
|
OutCapture capture;
|
||||||
|
int* dptr = nullptr;
|
||||||
|
HIP_CHECK(hipMalloc(&dptr, sizeof(int)));
|
||||||
|
|
||||||
|
// Set different logging parameters
|
||||||
|
HIP_CHECK(hipExtSetLoggingParams(3, 0, -1));
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
// Start capture and enable logging
|
||||||
|
capture.startCapture();
|
||||||
|
HIP_CHECK(hipExtEnableLogging());
|
||||||
|
HIP_CHECK(hipMemset(dptr, 0x42, sizeof(int)));
|
||||||
|
HIP_CHECK(hipExtDisableLogging());
|
||||||
|
|
||||||
|
// Check that we captured some output
|
||||||
|
std::string output = capture.stopCapture();
|
||||||
|
REQUIRE(output.size() > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
HIP_CHECK(hipFree(dptr));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End doxygen group ErrorTest.
|
||||||
|
* @}
|
||||||
|
*/
|
||||||
@@ -45,7 +45,7 @@ MATH_UNARY_HP_KERNEL_DEF(hcos);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(hcos, static_cast<float (*)(float)>(std::cos),
|
MATH_UNARY_HP_TEST_DEF_IMPL(hcos, static_cast<float (*)(float)>(std::cos),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(h2cos);
|
MATH_UNARY_HP_KERNEL_DEF(h2cos);
|
||||||
|
|
||||||
@@ -63,7 +63,7 @@ MATH_UNARY_HP_KERNEL_DEF(h2cos);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(h2cos, static_cast<float (*)(float)>(std::cos),
|
MATH_UNARY_HP_TEST_DEF_IMPL(h2cos, static_cast<float (*)(float)>(std::cos),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(hsin);
|
MATH_UNARY_HP_KERNEL_DEF(hsin);
|
||||||
@@ -82,7 +82,7 @@ MATH_UNARY_HP_KERNEL_DEF(hsin);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(hsin, static_cast<float (*)(float)>(std::sin),
|
MATH_UNARY_HP_TEST_DEF_IMPL(hsin, static_cast<float (*)(float)>(std::sin),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(h2sin);
|
MATH_UNARY_HP_KERNEL_DEF(h2sin);
|
||||||
|
|
||||||
@@ -100,7 +100,7 @@ MATH_UNARY_HP_KERNEL_DEF(h2sin);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(h2sin, static_cast<float (*)(float)>(std::sin),
|
MATH_UNARY_HP_TEST_DEF_IMPL(h2sin, static_cast<float (*)(float)>(std::sin),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(hexp);
|
MATH_UNARY_HP_KERNEL_DEF(hexp);
|
||||||
@@ -119,7 +119,7 @@ MATH_UNARY_HP_KERNEL_DEF(hexp);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(hexp, static_cast<float (*)(float)>(std::exp),
|
MATH_UNARY_HP_TEST_DEF_IMPL(hexp, static_cast<float (*)(float)>(std::exp),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(h2exp);
|
MATH_UNARY_HP_KERNEL_DEF(h2exp);
|
||||||
|
|
||||||
@@ -137,7 +137,7 @@ MATH_UNARY_HP_KERNEL_DEF(h2exp);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(h2exp, static_cast<float (*)(float)>(std::exp),
|
MATH_UNARY_HP_TEST_DEF_IMPL(h2exp, static_cast<float (*)(float)>(std::exp),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(hexp10);
|
MATH_UNARY_HP_KERNEL_DEF(hexp10);
|
||||||
@@ -156,7 +156,7 @@ MATH_UNARY_HP_KERNEL_DEF(hexp10);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(hexp10, static_cast<float (*)(float)>(exp10f),
|
MATH_UNARY_HP_TEST_DEF_IMPL(hexp10, static_cast<float (*)(float)>(exp10f),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(h2exp10);
|
MATH_UNARY_HP_KERNEL_DEF(h2exp10);
|
||||||
|
|
||||||
@@ -174,7 +174,7 @@ MATH_UNARY_HP_KERNEL_DEF(h2exp10);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(h2exp10, static_cast<float (*)(float)>(exp10f),
|
MATH_UNARY_HP_TEST_DEF_IMPL(h2exp10, static_cast<float (*)(float)>(exp10f),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(hexp2);
|
MATH_UNARY_HP_KERNEL_DEF(hexp2);
|
||||||
@@ -193,7 +193,7 @@ MATH_UNARY_HP_KERNEL_DEF(hexp2);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(hexp2, static_cast<float (*)(float)>(std::exp2),
|
MATH_UNARY_HP_TEST_DEF_IMPL(hexp2, static_cast<float (*)(float)>(std::exp2),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(h2exp2);
|
MATH_UNARY_HP_KERNEL_DEF(h2exp2);
|
||||||
|
|
||||||
@@ -211,7 +211,7 @@ MATH_UNARY_HP_KERNEL_DEF(h2exp2);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(h2exp2, static_cast<float (*)(float)>(std::exp2),
|
MATH_UNARY_HP_TEST_DEF_IMPL(h2exp2, static_cast<float (*)(float)>(std::exp2),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(hlog);
|
MATH_UNARY_HP_KERNEL_DEF(hlog);
|
||||||
@@ -230,7 +230,7 @@ MATH_UNARY_HP_KERNEL_DEF(hlog);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(hlog, static_cast<float (*)(float)>(std::log),
|
MATH_UNARY_HP_TEST_DEF_IMPL(hlog, static_cast<float (*)(float)>(std::log),
|
||||||
ULPValidatorBuilderFactory<float>(1));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(h2log);
|
MATH_UNARY_HP_KERNEL_DEF(h2log);
|
||||||
|
|
||||||
@@ -248,7 +248,7 @@ MATH_UNARY_HP_KERNEL_DEF(h2log);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(h2log, static_cast<float (*)(float)>(std::log),
|
MATH_UNARY_HP_TEST_DEF_IMPL(h2log, static_cast<float (*)(float)>(std::log),
|
||||||
ULPValidatorBuilderFactory<float>(1));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(hlog10);
|
MATH_UNARY_HP_KERNEL_DEF(hlog10);
|
||||||
@@ -267,7 +267,7 @@ MATH_UNARY_HP_KERNEL_DEF(hlog10);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(hlog10, static_cast<float (*)(float)>(std::log10),
|
MATH_UNARY_HP_TEST_DEF_IMPL(hlog10, static_cast<float (*)(float)>(std::log10),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(h2log10);
|
MATH_UNARY_HP_KERNEL_DEF(h2log10);
|
||||||
|
|
||||||
@@ -285,7 +285,7 @@ MATH_UNARY_HP_KERNEL_DEF(h2log10);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(h2log10, static_cast<float (*)(float)>(std::log10),
|
MATH_UNARY_HP_TEST_DEF_IMPL(h2log10, static_cast<float (*)(float)>(std::log10),
|
||||||
ULPValidatorBuilderFactory<float>(2));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(hlog2);
|
MATH_UNARY_HP_KERNEL_DEF(hlog2);
|
||||||
@@ -304,7 +304,7 @@ MATH_UNARY_HP_KERNEL_DEF(hlog2);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(hlog2, static_cast<float (*)(float)>(std::log2),
|
MATH_UNARY_HP_TEST_DEF_IMPL(hlog2, static_cast<float (*)(float)>(std::log2),
|
||||||
ULPValidatorBuilderFactory<float>(1));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(h2log2);
|
MATH_UNARY_HP_KERNEL_DEF(h2log2);
|
||||||
|
|
||||||
@@ -322,7 +322,7 @@ MATH_UNARY_HP_KERNEL_DEF(h2log2);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(h2log2, static_cast<float (*)(float)>(std::log2),
|
MATH_UNARY_HP_TEST_DEF_IMPL(h2log2, static_cast<float (*)(float)>(std::log2),
|
||||||
ULPValidatorBuilderFactory<float>(1));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(hsqrt);
|
MATH_UNARY_HP_KERNEL_DEF(hsqrt);
|
||||||
@@ -341,7 +341,7 @@ MATH_UNARY_HP_KERNEL_DEF(hsqrt);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(hsqrt, static_cast<float (*)(float)>(std::sqrt),
|
MATH_UNARY_HP_TEST_DEF_IMPL(hsqrt, static_cast<float (*)(float)>(std::sqrt),
|
||||||
ULPValidatorBuilderFactory<float>(1));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(h2sqrt);
|
MATH_UNARY_HP_KERNEL_DEF(h2sqrt);
|
||||||
|
|
||||||
@@ -359,7 +359,7 @@ MATH_UNARY_HP_KERNEL_DEF(h2sqrt);
|
|||||||
* - HIP_VERSION >= 5.2
|
* - HIP_VERSION >= 5.2
|
||||||
*/
|
*/
|
||||||
MATH_UNARY_HP_TEST_DEF_IMPL(h2sqrt, static_cast<float (*)(float)>(std::sqrt),
|
MATH_UNARY_HP_TEST_DEF_IMPL(h2sqrt, static_cast<float (*)(float)>(std::sqrt),
|
||||||
ULPValidatorBuilderFactory<float>(1));
|
ULPValidatorBuilderFactory<Float16>(1));
|
||||||
|
|
||||||
|
|
||||||
MATH_UNARY_HP_KERNEL_DEF(hceil);
|
MATH_UNARY_HP_KERNEL_DEF(hceil);
|
||||||
|
|||||||
@@ -187,7 +187,9 @@ template <typename T, typename... Ts> class MathTest {
|
|||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << "Input value(s): " << std::scientific
|
ss << "Input value(s): " << std::scientific
|
||||||
<< std::setprecision(std::numeric_limits<T>::max_digits10 - 1);
|
<< std::setprecision(std::numeric_limits<T>::max_digits10 - 1);
|
||||||
((ss << " " << args), ...) << "\n" << actual_val << " ";
|
((ss << " " << args), ...) << "\n"
|
||||||
|
<< "Output value: " << actual_val << "\n"
|
||||||
|
<< "Condition failed: ";
|
||||||
|
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,6 +25,12 @@ THE SOFTWARE.
|
|||||||
#include <catch2/catch_all.hpp>
|
#include <catch2/catch_all.hpp>
|
||||||
#include <catch2/matchers/catch_matchers_floating_point.hpp>
|
#include <catch2/matchers/catch_matchers_floating_point.hpp>
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
#include "Float16.hh"
|
||||||
|
|
||||||
// Define a new MatcherBase class with a public 'describe' member function because
|
// Define a new MatcherBase class with a public 'describe' member function because
|
||||||
// Catch::MatcherBase::describe is protected and thus can't be used via a pointer to
|
// Catch::MatcherBase::describe is protected and thus can't be used via a pointer to
|
||||||
// Catch::MatcherBase.
|
// Catch::MatcherBase.
|
||||||
@@ -61,6 +67,113 @@ template <typename T, typename Matcher> class ValidatorBase : public MatcherBase
|
|||||||
bool nan = false;
|
bool nan = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Float16WithinUlpsMatcher : MatcherBase<Float16> {
|
||||||
|
Float16WithinUlpsMatcher(Float16 target, uint64_t ulps) : m_target(target), m_ulps(ulps) {}
|
||||||
|
|
||||||
|
bool match(Float16 const& matchee) const override {
|
||||||
|
// Comparison with NaN should always be false.
|
||||||
|
// This way we can rule it out before getting into the ugly details
|
||||||
|
if (__hisnan(matchee) || __hisnan(m_target)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto value_bits = convertFloat16toInt16(matchee);
|
||||||
|
auto target_bits = convertFloat16toInt16(m_target);
|
||||||
|
|
||||||
|
// If signs differ, handle the special +0 vs -0 case explicitly.
|
||||||
|
if ((value_bits < 0) != (target_bits < 0)) {
|
||||||
|
return matchee == m_target;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ulp_diff = std::abs(value_bits - target_bits);
|
||||||
|
return static_cast<uint64_t>(ulp_diff) <= m_ulps;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string describe() const override {
|
||||||
|
std::stringstream ret;
|
||||||
|
|
||||||
|
ret << "is within " << m_ulps << " ULPs of ";
|
||||||
|
|
||||||
|
write(ret, m_target);
|
||||||
|
ret << 'f';
|
||||||
|
ret << " ([";
|
||||||
|
|
||||||
|
write(ret, step(m_target, -FLOAT16_MAX, m_ulps));
|
||||||
|
ret << ", ";
|
||||||
|
write(ret, step(m_target, FLOAT16_MAX, m_ulps));
|
||||||
|
|
||||||
|
ret << "])";
|
||||||
|
|
||||||
|
return ret.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Float16 getNextAfter(Float16 from, Float16 direction) const {
|
||||||
|
constexpr int16_t signbit_float16 = 0x8000;
|
||||||
|
|
||||||
|
// Encode inputs as 16-bit integers
|
||||||
|
const int16_t from_bits = convertFloat16toInt16(from);
|
||||||
|
const int16_t direction_bits = convertFloat16toInt16(direction);
|
||||||
|
|
||||||
|
// Special cases
|
||||||
|
if (from_bits == direction_bits) return direction_bits;
|
||||||
|
if (std::abs(from_bits) == static_cast<int16_t>(0) &&
|
||||||
|
std::abs(direction_bits) == static_cast<int16_t>(0))
|
||||||
|
return direction;
|
||||||
|
|
||||||
|
// Makes integer comparisons reflect numeric ordering across sign.
|
||||||
|
const int16_t from_ordered = (from_bits < 0) ? signbit_float16 - from_bits : from_bits;
|
||||||
|
const int16_t direction_ordered =
|
||||||
|
(direction_bits < 0) ? signbit_float16 - direction_bits : direction_bits;
|
||||||
|
|
||||||
|
// Decide whether to move up or down by one ULP
|
||||||
|
const int16_t step = (from_ordered < direction_ordered) ? 1 : -1;
|
||||||
|
|
||||||
|
// Take one step
|
||||||
|
const int16_t after_step_ordered = from_ordered + step;
|
||||||
|
|
||||||
|
// Map back from ordered space to raw Float16 bits.
|
||||||
|
int16_t next_bits =
|
||||||
|
(after_step_ordered < 0) ? signbit_float16 - after_step_ordered : after_step_ordered;
|
||||||
|
|
||||||
|
// Handle boundary behavior for the most-negative edge case.
|
||||||
|
if (from_ordered == -1 && (from_ordered < direction_ordered)) {
|
||||||
|
next_bits = signbit_float16;
|
||||||
|
}
|
||||||
|
|
||||||
|
return convertInt16toFloat16(next_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
Float16 step(Float16 start, Float16 direction, uint64_t steps) const {
|
||||||
|
Float16 result = start;
|
||||||
|
for (uint64_t i = 0; i < steps; ++i) {
|
||||||
|
result = getNextAfter(result, direction);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(std::ostream& out, Float16 num) const {
|
||||||
|
const uint32_t float16_max_digits = 5;
|
||||||
|
out << std::scientific << std::setprecision(float16_max_digits) << num;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Float16 convertInt16toFloat16(int16_t d) {
|
||||||
|
Float16 i;
|
||||||
|
std::memcpy(&i, &d, sizeof(int16_t));
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int16_t convertFloat16toInt16(Float16 d) {
|
||||||
|
uint16_t i;
|
||||||
|
std::memcpy(&i, &d, sizeof(Float16));
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
Float16 m_target;
|
||||||
|
uint64_t m_ulps;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> auto ULPValidatorBuilderFactory(int64_t ulps) {
|
template <typename T> auto ULPValidatorBuilderFactory(int64_t ulps) {
|
||||||
return [=](T target, auto&&...) {
|
return [=](T target, auto&&...) {
|
||||||
return std::make_unique<ValidatorBase<T, Catch::Matchers::WithinUlpsMatcher>>(
|
return std::make_unique<ValidatorBase<T, Catch::Matchers::WithinUlpsMatcher>>(
|
||||||
@@ -68,6 +181,13 @@ template <typename T> auto ULPValidatorBuilderFactory(int64_t ulps) {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <> inline auto ULPValidatorBuilderFactory<Float16>(int64_t ulps) {
|
||||||
|
return [=](Float16 target, auto&&...) {
|
||||||
|
return std::make_unique<ValidatorBase<Float16, Float16WithinUlpsMatcher>>(
|
||||||
|
target, Float16WithinUlpsMatcher(target, ulps));
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
template <typename T> auto AbsValidatorBuilderFactory(double margin) {
|
template <typename T> auto AbsValidatorBuilderFactory(double margin) {
|
||||||
return [=](T target, auto&&...) {
|
return [=](T target, auto&&...) {
|
||||||
return std::make_unique<ValidatorBase<T, Catch::Matchers::WithinAbsMatcher>>(
|
return std::make_unique<ValidatorBase<T, Catch::Matchers::WithinAbsMatcher>>(
|
||||||
@@ -96,7 +216,7 @@ template <typename T> class EqValidator : public MatcherBase<T> {
|
|||||||
|
|
||||||
std::string describe() const override {
|
std::string describe() const override {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << " is not equal to " << target_;
|
ss << "is equal to " << target_;
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -177,8 +177,8 @@ static bool checkhipMemPoolTrimTo(hipStream_t stream, int N, int dev = 0) {
|
|||||||
testObj.transferFromMempool(stream);
|
testObj.transferFromMempool(stream);
|
||||||
testObj.freeDevBuf(stream);
|
testObj.freeDevBuf(stream);
|
||||||
// verify and validate
|
// verify and validate
|
||||||
REQUIRE(true == testObj.validateResult());
|
|
||||||
HIP_CHECK(hipStreamSynchronize(stream));
|
HIP_CHECK(hipStreamSynchronize(stream));
|
||||||
|
REQUIRE(true == testObj.validateResult());
|
||||||
}
|
}
|
||||||
HIP_CHECK(hipMemPoolDestroy(mem_pool));
|
HIP_CHECK(hipMemPoolDestroy(mem_pool));
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -494,7 +494,6 @@ HIP doesn't support the following CUDA functions/operators in ``cooperative_grou
|
|||||||
* ``synchronize``
|
* ``synchronize``
|
||||||
* ``memcpy_async``
|
* ``memcpy_async``
|
||||||
* ``wait`` and ``wait_prior``
|
* ``wait`` and ``wait_prior``
|
||||||
* ``barrier_arrive`` and ``barrier_wait``
|
|
||||||
* ``invoke_one`` and ``invoke_one_broadcast``
|
* ``invoke_one`` and ``invoke_one_broadcast``
|
||||||
* ``reduce``
|
* ``reduce``
|
||||||
* ``reduce_update_async`` and ``reduce_store_async``
|
* ``reduce_update_async`` and ``reduce_store_async``
|
||||||
|
|||||||
@@ -9615,6 +9615,45 @@ hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject);
|
|||||||
/**
|
/**
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enable HIP runtime logging.
|
||||||
|
*
|
||||||
|
* This function enables the HIP runtime logging mechanism, allowing diagnostic
|
||||||
|
* and trace information to be captured during HIP API execution.
|
||||||
|
*
|
||||||
|
* @returns #hipSuccess
|
||||||
|
*
|
||||||
|
* @see hipExtDisableLogging, hipExtSetLoggingParams
|
||||||
|
*/
|
||||||
|
hipError_t hipExtEnableLogging();
|
||||||
|
/**
|
||||||
|
* @brief Disable HIP runtime logging.
|
||||||
|
*
|
||||||
|
* This function disables the HIP runtime logging mechanism, stopping the capture
|
||||||
|
* of diagnostic and trace information during HIP API execution.
|
||||||
|
*
|
||||||
|
* @returns #hipSuccess
|
||||||
|
*
|
||||||
|
* @see hipExtEnableLogging, hipExtSetLoggingParams
|
||||||
|
*/
|
||||||
|
hipError_t hipExtDisableLogging();
|
||||||
|
/**
|
||||||
|
* @brief Set HIP runtime logging parameters.
|
||||||
|
*
|
||||||
|
* This function configures the logging behavior of the HIP runtime, including
|
||||||
|
* the verbosity level, buffer size, and which components to log.
|
||||||
|
*
|
||||||
|
* @param [in] log_level The logging verbosity level. Higher values produce more detailed output.
|
||||||
|
* @param [in] log_size Reserved for future use. Currently not implemented.
|
||||||
|
* @param [in] log_mask A bitmask specifying which HIP runtime components to log.
|
||||||
|
*
|
||||||
|
* @returns #hipSuccess, #hipErrorInvalidValue
|
||||||
|
*
|
||||||
|
* @see hipExtEnableLogging, hipExtDisableLogging
|
||||||
|
*/
|
||||||
|
hipError_t hipExtSetLoggingParams(size_t log_level, size_t log_size, size_t log_mask);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "c" */
|
} /* extern "c" */
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ default_stages: [pre-commit]
|
|||||||
fail_fast: true
|
fail_fast: true
|
||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v5.0.0
|
rev: v6.0.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
@@ -12,7 +12,7 @@ repos:
|
|||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
# Ruff version. Check https://github.com/astral-sh/ruff-pre-commit#version-compatibility
|
# Ruff version. Check https://github.com/astral-sh/ruff-pre-commit#version-compatibility
|
||||||
# for the latest ruff version supported by the hook.
|
# for the latest ruff version supported by the hook.
|
||||||
rev: v0.12.12
|
rev: v0.14.11
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff-check
|
- id: ruff-check
|
||||||
args: [--fix]
|
args: [--fix]
|
||||||
|
|||||||
@@ -49,6 +49,10 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
|||||||
|
|
||||||
* Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes
|
* Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes
|
||||||
|
|
||||||
|
* Fix issue where dispatch filtering in a range (e.g. >2) was not working
|
||||||
|
|
||||||
|
* Fix redundant warnings for compute/memory partition not found for < MI 300 series GPUs by skipping partition checks
|
||||||
|
|
||||||
### Removed
|
### Removed
|
||||||
|
|
||||||
* Removed "VL1 Lat" metric for AMD Instinct MI300 series GPUs, due to MI300 series not supporting TCP_TCP_LATENCY_sum counter.
|
* Removed "VL1 Lat" metric for AMD Instinct MI300 series GPUs, due to MI300 series not supporting TCP_TCP_LATENCY_sum counter.
|
||||||
|
|||||||
@@ -4,14 +4,13 @@
|
|||||||
|
|
||||||
ROCm Compute Profiler is a system performance profiling tool for machine
|
ROCm Compute Profiler is a system performance profiling tool for machine
|
||||||
learning/HPC workloads running on AMD MI GPUs. The tool presently
|
learning/HPC workloads running on AMD MI GPUs. The tool presently
|
||||||
targets usage on MI100, MI200, and MI300 accelerators.
|
targets usage on MI100, MI200, MI300, and MI350 series accelerators.
|
||||||
|
|
||||||
* For more information on available features, installation steps, and
|
* For more information on available features, installation steps, and
|
||||||
workload profiling and analysis, please refer to the online
|
workload profiling and analysis, please refer to the online
|
||||||
[documentation](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/).
|
[documentation](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/).
|
||||||
|
|
||||||
* ROCm Compute Profiler is an AMD open source research project and is not supported
|
* ROCm Compute Profiler is an AMD open source tool that is part of the ROCm software stack. We welcome contributions and
|
||||||
as part of the ROCm software stack. We welcome contributions and
|
|
||||||
feedback from the community. Please see the
|
feedback from the community. Please see the
|
||||||
[CONTRIBUTING.md](CONTRIBUTING.md) file for additional details on our
|
[CONTRIBUTING.md](CONTRIBUTING.md) file for additional details on our
|
||||||
contribution process.
|
contribution process.
|
||||||
@@ -39,8 +38,8 @@ python3 -m pip install -r requirements.txt
|
|||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
|
||||||
Populate the <usename> variable in `docker/docker-compose.customrocmtest.yml`.
|
Populate the <username> variable in `docker/docker-compose.customrocmtest.yml`.
|
||||||
Populate the <rocm_build_image> variable in `docker/Dockerfile.customrocmtest` based on latest ROCm CI build information.
|
Populate the <tarball_name> variable in `docker/Dockerfile.customrocmtest` based on latest TheRock nightly build information.
|
||||||
|
|
||||||
To quickly get the environment (bash shell) for building and testing, run the following commands:
|
To quickly get the environment (bash shell) for building and testing, run the following commands:
|
||||||
* `cd docker`
|
* `cd docker`
|
||||||
@@ -115,7 +114,7 @@ This software can be cited using a Zenodo
|
|||||||
style reference is provided below for convenience:
|
style reference is provided below for convenience:
|
||||||
|
|
||||||
```
|
```
|
||||||
@software{xiaomin_lu_2022_7314631
|
@misc{xiaomin_lu_2022_7314631
|
||||||
author = {Xiaomin Lu and
|
author = {Xiaomin Lu and
|
||||||
Cole Ramos and
|
Cole Ramos and
|
||||||
Fei Zheng and
|
Fei Zheng and
|
||||||
@@ -124,12 +123,7 @@ style reference is provided below for convenience:
|
|||||||
Keith Lowery and
|
Keith Lowery and
|
||||||
Nicholas Curtis and
|
Nicholas Curtis and
|
||||||
Cristian Di Pietrantonio},
|
Cristian Di Pietrantonio},
|
||||||
title = {ROCm/rocprofiler-compute: v3.1.0 (12 February 2025)},
|
title = {rocprofiler-compute},
|
||||||
month = February,
|
url = {https://github.com/ROCm/rocm-systems/blob/develop/projects/rocprofiler-compute}
|
||||||
year = 2025,
|
|
||||||
publisher = {Zenodo},
|
|
||||||
version = {v3.1.0},
|
|
||||||
doi = {10.5281/zenodo.7314631},
|
|
||||||
url = {https://doi.org/10.5281/zenodo.7314631}
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -1,32 +1,52 @@
|
|||||||
# Use a base image
|
# Use a base image
|
||||||
FROM <rocm_build_image>
|
FROM ubuntu:22.04
|
||||||
|
|
||||||
# Set the working directory
|
# Install curl first (needed for ROCm download)
|
||||||
WORKDIR /app
|
RUN apt-get update && apt-get install -y curl
|
||||||
|
|
||||||
|
# Define the tarball name as a variable
|
||||||
|
# Check https://therock-nightly-tarball.s3.amazonaws.com/index.html for latest builds
|
||||||
|
# Use therock-dist-linux-gfx<arch>-dcgpu-<rocm-version>.tar.gz naming convention
|
||||||
|
ARG TARBALL_NAME=<tarball_name>
|
||||||
|
|
||||||
|
# Install ROCm from TheRock Nightly build
|
||||||
|
RUN mkdir -p /rocm && \
|
||||||
|
curl -fLO https://therock-nightly-tarball.s3.amazonaws.com/${TARBALL_NAME} && \
|
||||||
|
tar -xf ${TARBALL_NAME} -C /rocm && \
|
||||||
|
rm ${TARBALL_NAME}
|
||||||
|
|
||||||
|
# Set environment variables for ROCm
|
||||||
|
ENV PATH="/rocm/bin:${PATH}" \
|
||||||
|
ROCM_PATH="/rocm" \
|
||||||
|
LD_LIBRARY_PATH="/rocm/lib:/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH}" \
|
||||||
|
HIP_DEVICE_LIB_PATH="/rocm/llvm/amdgcn/bitcode" \
|
||||||
|
HIP_PLATFORM=amd
|
||||||
|
|
||||||
# Update package list and install prerequisites
|
# Update package list and install prerequisites
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
software-properties-common cmake locales git curl \
|
software-properties-common cmake locales git \
|
||||||
&& add-apt-repository ppa:deadsnakes/ppa \
|
&& add-apt-repository ppa:deadsnakes/ppa \
|
||||||
&& apt-get update
|
&& apt-get update
|
||||||
|
|
||||||
# Allows running git commands in /app
|
|
||||||
RUN git config --global --add safe.directory /app
|
|
||||||
|
|
||||||
# Generate the desired locale
|
# Generate the desired locale
|
||||||
RUN locale-gen en_US.UTF-8
|
RUN locale-gen en_US.UTF-8
|
||||||
|
|
||||||
# Install Python 3.10 and pip
|
# Install Python 3.10 and pip
|
||||||
RUN apt-get install -y python3.10 python3.10-venv python3.10-dev python3-pip libsqlite3-dev
|
RUN apt-get install -y python3.10 python3.10-venv python3.10-dev python3-pip libsqlite3-dev
|
||||||
RUN python3.10 -m venv venv
|
RUN python3.10 -m venv /venv
|
||||||
ENV PATH="venv/bin:$PATH"
|
ENV PATH="/venv/bin:$PATH"
|
||||||
RUN python -m pip install --upgrade pip
|
RUN python -m pip install --upgrade pip
|
||||||
|
|
||||||
# Install any dependencies specified in requirements.txt
|
# Install any rocprofiler-compute dependencies specified in requirements.txt
|
||||||
WORKDIR /app/projects/rocprofiler-compute
|
|
||||||
COPY projects/rocprofiler-compute/requirements.txt /app/projects/rocprofiler-compute/requirements.txt
|
COPY projects/rocprofiler-compute/requirements.txt /app/projects/rocprofiler-compute/requirements.txt
|
||||||
COPY projects/rocprofiler-compute/requirements-test.txt /app/projects/rocprofiler-compute/requirements-test.txt
|
COPY projects/rocprofiler-compute/requirements-test.txt /app/projects/rocprofiler-compute/requirements-test.txt
|
||||||
RUN python -m pip install -r requirements.txt -r requirements-test.txt
|
RUN python -m pip install -r /app/projects/rocprofiler-compute/requirements.txt -r /app/projects/rocprofiler-compute/requirements-test.txt
|
||||||
|
|
||||||
|
# Set the working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Allows running git commands in /app
|
||||||
|
RUN git config --global --add safe.directory /app
|
||||||
|
|
||||||
# Run interactive bash shell
|
# Run interactive bash shell
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|||||||
Binary file not shown.
|
Före Bredd: | Höjd: | Storlek: 185 KiB Efter Bredd: | Höjd: | Storlek: 254 KiB |
Binary file not shown.
|
Före Bredd: | Höjd: | Storlek: 34 KiB Efter Bredd: | Höjd: | Storlek: 240 KiB |
@@ -15,7 +15,7 @@ This section provides an overview of ROCm Compute Profiler's CLI analysis featur
|
|||||||
* :ref:`Metric customization <cli-analysis-options>`: Isolate a subset of built-in metrics or build your own profiling configuration.
|
* :ref:`Metric customization <cli-analysis-options>`: Isolate a subset of built-in metrics or build your own profiling configuration.
|
||||||
|
|
||||||
* :ref:`Filtering <cli-analysis-options>`: Hone in on a particular kernel, GPU ID, or dispatch ID via post-process filtering.
|
* :ref:`Filtering <cli-analysis-options>`: Hone in on a particular kernel, GPU ID, or dispatch ID via post-process filtering.
|
||||||
|
|
||||||
* :ref:`Per-kernel roofline analysis <per-kernel-roofline>`: Detailed arithmetic intensity and performance analysis for individual kernels.
|
* :ref:`Per-kernel roofline analysis <per-kernel-roofline>`: Detailed arithmetic intensity and performance analysis for individual kernels.
|
||||||
|
|
||||||
Run ``rocprof-compute analyze -h`` for more details.
|
Run ``rocprof-compute analyze -h`` for more details.
|
||||||
@@ -346,6 +346,7 @@ Show System Speed-of-Light and CS_Busy blocks only
|
|||||||
this case, ``1`` is the ID for System Speed-of-Light and ``5.1.0`` the ID for
|
this case, ``1`` is the ID for System Speed-of-Light and ``5.1.0`` the ID for
|
||||||
GPU Busy Cycles metric.
|
GPU Busy Cycles metric.
|
||||||
|
|
||||||
|
|
||||||
Filter kernels
|
Filter kernels
|
||||||
First, list the top kernels in your application using `--list-stats`.
|
First, list the top kernels in your application using `--list-stats`.
|
||||||
|
|
||||||
@@ -534,36 +535,40 @@ Analysis database example
|
|||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
$ rocprof-compute analyze --verbose --db test -p workloads/vmem/MI300X_A1 -p workloads/vmem1/MI300X_A1
|
$ rocprof-compute analyze --verbose --output-name test --output-format db -p workloads/nbody/MI300X_A1 -p workloads/nbody1/MI300X_A1
|
||||||
DEBUG Execution mode = analyze
|
DEBUG Execution mode = analyze
|
||||||
|
|
||||||
__ _
|
__ _
|
||||||
_ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___
|
_ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___
|
||||||
| '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \
|
| '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \
|
||||||
| | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/
|
| | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/
|
||||||
|_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___|
|
|_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___|
|
||||||
|_| |_|
|
|_| |_|
|
||||||
|
|
||||||
INFO Analysis mode = db
|
INFO Analysis mode = db
|
||||||
DEBUG [omnisoc init]
|
INFO ed45b0b189
|
||||||
DEBUG [omnisoc init]
|
DEBUG [omnisoc init]
|
||||||
DEBUG [analysis] prepping to do some analysis
|
INFO ed45b0b189
|
||||||
INFO [analysis] deriving rocprofiler-compute metrics...
|
DEBUG [omnisoc init]
|
||||||
WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1.
|
DEBUG [analysis] prepping to do some analysis
|
||||||
WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1.
|
INFO [analysis] deriving rocprofiler-compute metrics...
|
||||||
WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1.
|
DEBUG Collected roofline ceilings
|
||||||
WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1.
|
WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/nbody/MI300X_A1.
|
||||||
DEBUG Collected dispatch data
|
WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/nbody1/MI300X_A1.
|
||||||
DEBUG Applied analysis mode filters
|
DEBUG Collected dispatch data
|
||||||
DEBUG Calculated dispatch data
|
DEBUG Applied analysis mode filters
|
||||||
DEBUG Collected metrics data
|
DEBUG Calculated dispatch data
|
||||||
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
|
DEBUG Collected metrics data
|
||||||
|
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
|
||||||
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
|
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
|
||||||
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
|
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
|
||||||
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
|
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
|
||||||
DEBUG Calculated metric values
|
DEBUG Calculated metric values
|
||||||
DEBUG Calculated roofline data points
|
DEBUG Calculated roofline data points
|
||||||
DEBUG [analysis] generating analysis
|
DEBUG [analysis] generating analysis
|
||||||
DEBUG SQLite database initialized with name: test.db
|
DEBUG SQLite database initialized with name: test.db
|
||||||
DEBUG Initialized database: test.db
|
DEBUG Initialized database: test.db
|
||||||
DEBUG Completed writing database
|
INFO ed45b0b189
|
||||||
|
INFO ed45b0b189
|
||||||
|
DEBUG Completed writing database
|
||||||
|
WARNING Created file: test.db
|
||||||
|
|||||||
@@ -28,7 +28,9 @@ Launch the standalone GUI analyzer
|
|||||||
----------------------------------
|
----------------------------------
|
||||||
|
|
||||||
To launch the ROCm Compute Profiler GUI analyzer, include the ``--gui`` flag with your
|
To launch the ROCm Compute Profiler GUI analyzer, include the ``--gui`` flag with your
|
||||||
desired analysis command. For example:
|
desired analysis command.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
.. code-block:: shell-session
|
.. code-block:: shell-session
|
||||||
|
|
||||||
|
|||||||
@@ -386,26 +386,24 @@ class OmniAnalyze_Base:
|
|||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Ensure analysis output does not overwrite existing files
|
# Ensure analysis output does not overwrite existing files
|
||||||
if not args.output_name:
|
if args.output_name:
|
||||||
return
|
if not re.match(r"^[A-Za-z0-9_-]+$", args.output_name):
|
||||||
|
console_error(
|
||||||
|
"analysis",
|
||||||
|
"Analysis output file/folder name must "
|
||||||
|
"contain only alphanumeric characters "
|
||||||
|
"or underscores (_), hyphens (-).",
|
||||||
|
)
|
||||||
|
|
||||||
if not re.match(r"^[A-Za-z0-9_-]+$", args.output_name):
|
path_to_check = args.output_name
|
||||||
console_error(
|
if args.output_format in ("txt", "db"):
|
||||||
"analysis",
|
path_to_check += f".{args.output_format}"
|
||||||
"Analysis output file/folder name must "
|
|
||||||
"contain only alphanumeric characters "
|
|
||||||
"or underscores (_), hyphens (-).",
|
|
||||||
)
|
|
||||||
|
|
||||||
path_to_check = args.output_name
|
if Path(path_to_check).exists():
|
||||||
if args.output_format in ("txt", "db"):
|
console_error(
|
||||||
path_to_check += f".{args.output_format}"
|
f"Analysis output file/folder {path_to_check} already exists. "
|
||||||
|
"Please choose a different name."
|
||||||
if Path(path_to_check).exists():
|
)
|
||||||
console_error(
|
|
||||||
f"Analysis output file/folder {path_to_check} already exists. "
|
|
||||||
"Please choose a different name."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check if any kernel's counters are missing due to iteration multiplexing
|
# Check if any kernel's counters are missing due to iteration multiplexing
|
||||||
if (
|
if (
|
||||||
|
|||||||
@@ -101,7 +101,9 @@ class db_analysis(OmniAnalyze_Base):
|
|||||||
Database.init(db_name)
|
Database.init(db_name)
|
||||||
console_debug(f"Initialized database: {db_name}")
|
console_debug(f"Initialized database: {db_name}")
|
||||||
|
|
||||||
|
# Iterate over all workloads
|
||||||
for workload_path in self._runs.keys():
|
for workload_path in self._runs.keys():
|
||||||
|
# Add workload
|
||||||
workload_obj = orm.Workload(
|
workload_obj = orm.Workload(
|
||||||
name=workload_path.split("/")[-2],
|
name=workload_path.split("/")[-2],
|
||||||
sub_name=workload_path.split("/")[-1],
|
sub_name=workload_path.split("/")[-1],
|
||||||
@@ -113,38 +115,9 @@ class db_analysis(OmniAnalyze_Base):
|
|||||||
)
|
)
|
||||||
Database.get_session().add(workload_obj)
|
Database.get_session().add(workload_obj)
|
||||||
|
|
||||||
for pc_sample in self._pc_sampling_data_per_workload.get(
|
# Add kernel
|
||||||
workload_path, pd.DataFrame()
|
|
||||||
).itertuples():
|
|
||||||
Database.get_session().add(
|
|
||||||
orm.PCsampling(
|
|
||||||
source=pc_sample.source_line,
|
|
||||||
instruction=pc_sample.instruction,
|
|
||||||
count=pc_sample.count,
|
|
||||||
kernel_name=pc_sample.kernel_name,
|
|
||||||
offset=pc_sample.offset,
|
|
||||||
count_issue=pc_sample.count_issued,
|
|
||||||
count_stall=pc_sample.count_stalled,
|
|
||||||
stall_reason=pc_sample.stall_reason,
|
|
||||||
workload=workload_obj,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
for roofline_data in self._roofline_data_per_workload.get(
|
|
||||||
workload_path, pd.DataFrame()
|
|
||||||
).itertuples():
|
|
||||||
Database.get_session().add(
|
|
||||||
orm.RooflineData(
|
|
||||||
kernel_name=roofline_data.kernel_name,
|
|
||||||
total_flops=roofline_data.total_flops,
|
|
||||||
l1_cache_data=roofline_data.l1_cache_data,
|
|
||||||
l2_cache_data=roofline_data.l2_cache_data,
|
|
||||||
hbm_cache_data=roofline_data.hbm_cache_data,
|
|
||||||
workload=workload_obj,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
kernel_objs: dict[str, orm.Kernel] = {}
|
kernel_objs: dict[str, orm.Kernel] = {}
|
||||||
|
|
||||||
for dispatch in self._dispatch_data_per_workload.get(
|
for dispatch in self._dispatch_data_per_workload.get(
|
||||||
workload_path, pd.DataFrame()
|
workload_path, pd.DataFrame()
|
||||||
).itertuples():
|
).itertuples():
|
||||||
@@ -167,44 +140,101 @@ class db_analysis(OmniAnalyze_Base):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Optimize: Pre-group values by (metric_id, kernel_name) for O(1) lookups
|
# Add roofline data points
|
||||||
values_df = self._values_data_per_workload.get(
|
for roofline_data in self._roofline_data_per_workload.get(
|
||||||
workload_path, pd.DataFrame()
|
|
||||||
)
|
|
||||||
values_grouped = {}
|
|
||||||
if not values_df.empty:
|
|
||||||
for value in values_df.itertuples():
|
|
||||||
key = (value.metric_id, value.kernel_name)
|
|
||||||
if key not in values_grouped:
|
|
||||||
values_grouped[key] = []
|
|
||||||
values_grouped[key].append(value)
|
|
||||||
|
|
||||||
for metric in self._metrics_info_data_per_workload.get(
|
|
||||||
workload_path, pd.DataFrame()
|
workload_path, pd.DataFrame()
|
||||||
).itertuples():
|
).itertuples():
|
||||||
for kernel_name in kernel_objs.keys():
|
if roofline_data.kernel_name not in kernel_objs:
|
||||||
metric_obj = orm.Metric(
|
console_warning(
|
||||||
name=metric.name,
|
f"Kernel {roofline_data.kernel_name} from roofline data "
|
||||||
metric_id=metric.metric_id,
|
"not found in dispatch data. Skipping roofline entry."
|
||||||
description=metric.description,
|
|
||||||
unit=metric.unit,
|
|
||||||
table_name=metric.table_name,
|
|
||||||
sub_table_name=metric.sub_table_name,
|
|
||||||
kernel=kernel_objs[kernel_name],
|
|
||||||
)
|
)
|
||||||
Database.get_session().add(metric_obj)
|
continue
|
||||||
|
Database.get_session().add(
|
||||||
|
orm.RooflineData(
|
||||||
|
total_flops=roofline_data.total_flops,
|
||||||
|
l1_cache_data=roofline_data.l1_cache_data,
|
||||||
|
l2_cache_data=roofline_data.l2_cache_data,
|
||||||
|
hbm_cache_data=roofline_data.hbm_cache_data,
|
||||||
|
kernel=kernel_objs[roofline_data.kernel_name],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Direct lookup instead of iterating through all values
|
# Add pc sampling data
|
||||||
key = (metric.metric_id, kernel_name)
|
for pc_sample in self._pc_sampling_data_per_workload.get(
|
||||||
for value in values_grouped.get(key, []):
|
workload_path, pd.DataFrame()
|
||||||
Database.get_session().add(
|
).itertuples():
|
||||||
orm.Value(
|
if pc_sample.kernel_name not in kernel_objs:
|
||||||
metric=metric_obj,
|
console_warning(
|
||||||
value_name=value.value_name,
|
f"Kernel {pc_sample.kernel_name} from PC sampling data "
|
||||||
value=value.value,
|
"not found in dispatch data. Skipping PC sampling entry."
|
||||||
)
|
)
|
||||||
|
continue
|
||||||
|
Database.get_session().add(
|
||||||
|
orm.PCsampling(
|
||||||
|
source=pc_sample.source_line,
|
||||||
|
instruction=pc_sample.instruction,
|
||||||
|
count=pc_sample.count,
|
||||||
|
offset=pc_sample.offset,
|
||||||
|
count_issue=pc_sample.count_issued,
|
||||||
|
count_stall=pc_sample.count_stalled,
|
||||||
|
stall_reason=pc_sample.stall_reason,
|
||||||
|
kernel=kernel_objs[pc_sample.kernel_name],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add metrics and values - iterate on values, create metrics as needed
|
||||||
|
metrics_info_dict = {
|
||||||
|
row.metric_id: row
|
||||||
|
for row in self._metrics_info_data_per_workload.get(
|
||||||
|
workload_path, pd.DataFrame()
|
||||||
|
).itertuples()
|
||||||
|
}
|
||||||
|
metric_objs: dict[str, orm.MetricDefinition] = {}
|
||||||
|
|
||||||
|
for value in self._values_data_per_workload.get(
|
||||||
|
workload_path, pd.DataFrame()
|
||||||
|
).itertuples():
|
||||||
|
# Check if kernel exists
|
||||||
|
if value.kernel_name not in kernel_objs:
|
||||||
|
console_warning(
|
||||||
|
f"Kernel {value.kernel_name} from values data "
|
||||||
|
"not found in dispatch data. Skipping metric value."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create or reuse metric object
|
||||||
|
if value.metric_id not in metric_objs:
|
||||||
|
# Fetch metric info
|
||||||
|
if value.metric_id not in metrics_info_dict:
|
||||||
|
console_warning(
|
||||||
|
f"Metric {value.metric_id} from values data "
|
||||||
|
"not found in metrics info. Skipping metric value."
|
||||||
)
|
)
|
||||||
|
continue
|
||||||
|
metric_info = metrics_info_dict[value.metric_id]
|
||||||
|
metric_objs[value.metric_id] = orm.MetricDefinition(
|
||||||
|
name=metric_info.name,
|
||||||
|
metric_id=metric_info.metric_id,
|
||||||
|
description=metric_info.description,
|
||||||
|
unit=metric_info.unit,
|
||||||
|
table_name=metric_info.table_name,
|
||||||
|
sub_table_name=metric_info.sub_table_name,
|
||||||
|
workload=workload_obj,
|
||||||
|
)
|
||||||
|
Database.get_session().add(metric_objs[value.metric_id])
|
||||||
|
|
||||||
|
# Add value
|
||||||
|
Database.get_session().add(
|
||||||
|
orm.MetricValue(
|
||||||
|
metric=metric_objs[value.metric_id],
|
||||||
|
kernel=kernel_objs[value.kernel_name],
|
||||||
|
value_name=value.value_name,
|
||||||
|
value=value.value,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add metadata
|
||||||
version = get_version(rocprof_compute_home)
|
version = get_version(rocprof_compute_home)
|
||||||
Database.get_session().add(
|
Database.get_session().add(
|
||||||
orm.Metadata(
|
orm.Metadata(
|
||||||
|
|||||||
@@ -51,7 +51,6 @@ class webui_analysis(OmniAnalyze_Base):
|
|||||||
self.app = dash.Dash(
|
self.app = dash.Dash(
|
||||||
__name__, title=PROJECT_NAME, external_stylesheets=[dbc.themes.CYBORG]
|
__name__, title=PROJECT_NAME, external_stylesheets=[dbc.themes.CYBORG]
|
||||||
)
|
)
|
||||||
self.dest_dir = str(Path(args.path[0][0]).absolute().resolve())
|
|
||||||
self.arch: Optional[str] = None
|
self.arch: Optional[str] = None
|
||||||
|
|
||||||
self.__hidden_sections = ["Memory Chart"]
|
self.__hidden_sections = ["Memory Chart"]
|
||||||
@@ -90,6 +89,7 @@ class webui_analysis(OmniAnalyze_Base):
|
|||||||
kernel_top_df = base_data.dfs[1]
|
kernel_top_df = base_data.dfs[1]
|
||||||
for kernel_id in base_data.filter_kernel_ids:
|
for kernel_id in base_data.filter_kernel_ids:
|
||||||
filt_kernel_names.append(str(kernel_top_df.loc[kernel_id, "Kernel_Name"]))
|
filt_kernel_names.append(str(kernel_top_df.loc[kernel_id, "Kernel_Name"]))
|
||||||
|
input_filters["kernel"] = filt_kernel_names
|
||||||
|
|
||||||
# setup app layout
|
# setup app layout
|
||||||
from utils.gui_components.header import get_header
|
from utils.gui_components.header import get_header
|
||||||
@@ -338,6 +338,7 @@ class webui_analysis(OmniAnalyze_Base):
|
|||||||
)
|
)
|
||||||
|
|
||||||
args = self.get_args()
|
args = self.get_args()
|
||||||
|
self.dest_dir = str(Path(args.path[0][0]).absolute().resolve())
|
||||||
|
|
||||||
# create 'mega dataframe'
|
# create 'mega dataframe'
|
||||||
self._runs[self.dest_dir].raw_pmc = file_io.create_df_pmc(
|
self._runs[self.dest_dir].raw_pmc = file_io.create_df_pmc(
|
||||||
|
|||||||
@@ -531,12 +531,15 @@ class RocProfCompute_Base:
|
|||||||
and not args.attach_pid
|
and not args.attach_pid
|
||||||
):
|
):
|
||||||
# Use native counter collection tool
|
# Use native counter collection tool
|
||||||
|
# Use lib* glob pattern to handle CMAKE_INSTALL_LIBDIR variations
|
||||||
|
# (lib, lib64, lib32, etc. depending on distribution)
|
||||||
|
native_tool_base_path = Path(sys.argv[0]).resolve().parents[2]
|
||||||
|
native_tool_glob_pattern = (
|
||||||
|
"lib*/rocprofiler-compute/librocprofiler-compute-tool.so"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
native_tool_path = str(
|
native_tool_path = str(
|
||||||
Path(sys.argv[0]).resolve().parents[2]
|
next(native_tool_base_path.glob(native_tool_glob_pattern))
|
||||||
/ "lib"
|
|
||||||
/ "rocprofiler-compute"
|
|
||||||
/ "librocprofiler-compute-tool.so"
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
console_debug(
|
console_debug(
|
||||||
@@ -552,6 +555,7 @@ class RocProfCompute_Base:
|
|||||||
)
|
)
|
||||||
/ "librocprofiler-compute-tool.so"
|
/ "librocprofiler-compute-tool.so"
|
||||||
)
|
)
|
||||||
|
native_tool_cpp_path = Path(__file__).resolve().parents[1] / "lib"
|
||||||
link_libraries = ("rocprofiler-sdk",)
|
link_libraries = ("rocprofiler-sdk",)
|
||||||
build_command = (
|
build_command = (
|
||||||
# Create shared object
|
# Create shared object
|
||||||
@@ -564,10 +568,10 @@ class RocProfCompute_Base:
|
|||||||
# rocprofiler sdk library path
|
# rocprofiler sdk library path
|
||||||
f"-L {str(Path(args.rocprofiler_sdk_tool_path).parent.parent)} "
|
f"-L {str(Path(args.rocprofiler_sdk_tool_path).parent.parent)} "
|
||||||
# native tool source files (tool.cpp and helper.cpp)
|
# native tool source files (tool.cpp and helper.cpp)
|
||||||
f"{str(Path(__file__).parent.parent)}/"
|
f"{native_tool_cpp_path}/"
|
||||||
"lib/rocprofiler_compute_tool.cpp "
|
"rocprofiler_compute_tool.cpp "
|
||||||
f"{str(Path(__file__).parent.parent)}/"
|
f"{native_tool_cpp_path}/"
|
||||||
"lib/helper.cpp "
|
"helper.cpp "
|
||||||
# temporary shared object for native tool
|
# temporary shared object for native tool
|
||||||
f"-o {native_tool_path}"
|
f"-o {native_tool_path}"
|
||||||
)
|
)
|
||||||
@@ -575,7 +579,15 @@ class RocProfCompute_Base:
|
|||||||
success, output = capture_subprocess_output(shlex.split(build_command))
|
success, output = capture_subprocess_output(shlex.split(build_command))
|
||||||
console_debug(f"Build output: {output}")
|
console_debug(f"Build output: {output}")
|
||||||
if not success:
|
if not success:
|
||||||
console_error("Failed to build native counter collection tool.")
|
console_error(
|
||||||
|
"Failed to use native counter collection tool.\n"
|
||||||
|
"Could not find pre-built .so file at: "
|
||||||
|
f"{native_tool_base_path / native_tool_glob_pattern}\n"
|
||||||
|
"Could not find source .cpp files in folder: "
|
||||||
|
f"{native_tool_cpp_path}\n"
|
||||||
|
"Please ensure the native tool library is installed "
|
||||||
|
"or source files are present."
|
||||||
|
)
|
||||||
|
|
||||||
if self.__profiler == "rocprofiler-sdk":
|
if self.__profiler == "rocprofiler-sdk":
|
||||||
options = self.get_profiler_options(native_tool_path=native_tool_path)
|
options = self.get_profiler_options(native_tool_path=native_tool_path)
|
||||||
|
|||||||
@@ -92,7 +92,6 @@ class rocprofiler_sdk_profiler(RocProfCompute_Base):
|
|||||||
/ "librocprofiler-sdk-rocattach.so"
|
/ "librocprofiler-sdk-rocattach.so"
|
||||||
)
|
)
|
||||||
options.update({
|
options.update({
|
||||||
"ROCPROF_ATTACH_TOOL_LIBRARY": rocprofiler_sdk_tool_path,
|
|
||||||
"ROCPROF_ATTACH_LIBRARY": rocprofiler_attach_library_path,
|
"ROCPROF_ATTACH_LIBRARY": rocprofiler_attach_library_path,
|
||||||
"ROCPROF_ATTACH_PID": args.attach_pid,
|
"ROCPROF_ATTACH_PID": args.attach_pid,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ from sqlalchemy.sql import Select
|
|||||||
from utils.logger import console_debug, console_error
|
from utils.logger import console_debug, console_error
|
||||||
|
|
||||||
PREFIX = "compute_"
|
PREFIX = "compute_"
|
||||||
SCHEMA_VERSION = "1.1.0"
|
SCHEMA_VERSION = "1.2.0"
|
||||||
|
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
@@ -63,18 +63,16 @@ class Workload(Base):
|
|||||||
|
|
||||||
# Workload can have multiple kernels
|
# Workload can have multiple kernels
|
||||||
kernels = relationship("Kernel", back_populates="workload")
|
kernels = relationship("Kernel", back_populates="workload")
|
||||||
# Workload can have multiple roofline data points
|
# Workload can have multiple metric definitions
|
||||||
roofline_data_points = relationship("RooflineData", back_populates="workload")
|
metric_definitions = relationship("MetricDefinition", back_populates="workload")
|
||||||
# Workload can have multiple pc_sampling values
|
|
||||||
pc_sampling_values = relationship("PCsampling", back_populates="workload")
|
|
||||||
|
|
||||||
|
|
||||||
class Metric(Base):
|
class MetricDefinition(Base):
|
||||||
__tablename__ = f"{PREFIX}metric"
|
__tablename__ = f"{PREFIX}metric_definition"
|
||||||
|
|
||||||
metric_uuid = Column(Integer, primary_key=True)
|
metric_uuid = Column(Integer, primary_key=True)
|
||||||
kernel_uuid = Column(
|
workload_id = Column(
|
||||||
Integer, ForeignKey(f"{PREFIX}kernel.kernel_uuid"), nullable=False
|
Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False
|
||||||
)
|
)
|
||||||
name = Column(String) # e.g. Wavefronts Num
|
name = Column(String) # e.g. Wavefronts Num
|
||||||
metric_id = Column(String) # e.g. 4.1.3
|
metric_id = Column(String) # e.g. 4.1.3
|
||||||
@@ -83,27 +81,26 @@ class Metric(Base):
|
|||||||
sub_table_name = Column(String) # e.g. Wavefront stats
|
sub_table_name = Column(String) # e.g. Wavefront stats
|
||||||
unit = Column(String) # e.g. Gbps
|
unit = Column(String) # e.g. Gbps
|
||||||
|
|
||||||
# Metric can have one kernel
|
# Metric can have one workload
|
||||||
kernel = relationship("Kernel", back_populates="metrics")
|
workload = relationship("Workload", back_populates="metric_definitions")
|
||||||
# Metric can have multiple values
|
# Metric can have multiple metric values
|
||||||
values = relationship("Value", back_populates="metric")
|
metric_values = relationship("MetricValue", back_populates="metric")
|
||||||
|
|
||||||
|
|
||||||
class RooflineData(Base):
|
class RooflineData(Base):
|
||||||
__tablename__ = f"{PREFIX}roofline_data"
|
__tablename__ = f"{PREFIX}roofline_data"
|
||||||
|
|
||||||
roofline_uuid = Column(Integer, primary_key=True)
|
roofline_uuid = Column(Integer, primary_key=True)
|
||||||
workload_id = Column(
|
kernel_uuid = Column(
|
||||||
Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False
|
Integer, ForeignKey(f"{PREFIX}kernel.kernel_uuid"), nullable=False
|
||||||
)
|
)
|
||||||
kernel_name = Column(String)
|
|
||||||
total_flops = Column(Float)
|
total_flops = Column(Float)
|
||||||
l1_cache_data = Column(Float)
|
l1_cache_data = Column(Float)
|
||||||
l2_cache_data = Column(Float)
|
l2_cache_data = Column(Float)
|
||||||
hbm_cache_data = Column(Float)
|
hbm_cache_data = Column(Float)
|
||||||
|
|
||||||
# Roofline data point can have one workload
|
# Roofline data point can have one kernel
|
||||||
workload = relationship("Workload", back_populates="roofline_data_points")
|
kernel = relationship("Kernel", back_populates="roofline_data_points")
|
||||||
|
|
||||||
|
|
||||||
class Dispatch(Base):
|
class Dispatch(Base):
|
||||||
@@ -135,42 +132,50 @@ class Kernel(Base):
|
|||||||
workload = relationship("Workload", back_populates="kernels")
|
workload = relationship("Workload", back_populates="kernels")
|
||||||
# Kernel can have multiple dispatches
|
# Kernel can have multiple dispatches
|
||||||
dispatches = relationship("Dispatch", back_populates="kernel")
|
dispatches = relationship("Dispatch", back_populates="kernel")
|
||||||
# Kernel can have multiple metrics
|
# Kernel can have multiple metric values
|
||||||
metrics = relationship("Metric", back_populates="kernel")
|
metric_values = relationship("MetricValue", back_populates="kernel")
|
||||||
|
# Kernel can have multiple roofline data points
|
||||||
|
roofline_data_points = relationship("RooflineData", back_populates="kernel")
|
||||||
|
# Kernel can have multiple pc_sampling values
|
||||||
|
pc_sampling_values = relationship("PCsampling", back_populates="kernel")
|
||||||
|
|
||||||
|
|
||||||
class PCsampling(Base):
|
class PCsampling(Base):
|
||||||
__tablename__ = f"{PREFIX}pcsampling"
|
__tablename__ = f"{PREFIX}pcsampling"
|
||||||
|
|
||||||
pc_sampling_uuid = Column(Integer, primary_key=True)
|
pc_sampling_uuid = Column(Integer, primary_key=True)
|
||||||
workload_id = Column(
|
kernel_uuid = Column(
|
||||||
Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False
|
Integer, ForeignKey(f"{PREFIX}kernel.kernel_uuid"), nullable=False
|
||||||
)
|
)
|
||||||
source = Column(String)
|
source = Column(String)
|
||||||
instruction = Column(String)
|
instruction = Column(String)
|
||||||
count = Column(Integer)
|
count = Column(Integer)
|
||||||
kernel_name = Column(String)
|
|
||||||
offset = Column(Integer)
|
offset = Column(Integer)
|
||||||
count_issue = Column(Integer)
|
count_issue = Column(Integer)
|
||||||
count_stall = Column(Integer)
|
count_stall = Column(Integer)
|
||||||
stall_reason = Column(JSON)
|
stall_reason = Column(JSON)
|
||||||
|
|
||||||
# PCsampling can have one workload
|
# PCsampling can have one kernel
|
||||||
workload = relationship("Workload", back_populates="pc_sampling_values")
|
kernel = relationship("Kernel", back_populates="pc_sampling_values")
|
||||||
|
|
||||||
|
|
||||||
class Value(Base):
|
class MetricValue(Base):
|
||||||
__tablename__ = f"{PREFIX}value"
|
__tablename__ = f"{PREFIX}metric_value"
|
||||||
|
|
||||||
value_uuid = Column(Integer, primary_key=True)
|
value_uuid = Column(Integer, primary_key=True)
|
||||||
metric_uuid = Column(
|
metric_uuid = Column(
|
||||||
Integer, ForeignKey(f"{PREFIX}metric.metric_uuid"), nullable=False
|
Integer, ForeignKey(f"{PREFIX}metric_definition.metric_uuid"), nullable=False
|
||||||
|
)
|
||||||
|
kernel_uuid = Column(
|
||||||
|
Integer, ForeignKey(f"{PREFIX}kernel.kernel_uuid"), nullable=False
|
||||||
)
|
)
|
||||||
value_name = Column(String) # e.g. min, max, avg
|
value_name = Column(String) # e.g. min, max, avg
|
||||||
value = Column(Float) # e.g. 123.45
|
value = Column(Float) # e.g. 123.45
|
||||||
|
|
||||||
# Value can have one metric
|
# Value can have one metric
|
||||||
metric = relationship("Metric", back_populates="values")
|
metric = relationship("MetricDefinition", back_populates="metric_values")
|
||||||
|
# Value can have one kernel
|
||||||
|
kernel = relationship("Kernel", back_populates="metric_values")
|
||||||
|
|
||||||
|
|
||||||
class Metadata(Base):
|
class Metadata(Base):
|
||||||
@@ -250,11 +255,20 @@ def get_views() -> list[TextClause]:
|
|||||||
|
|
||||||
views: dict[str, Select[Any]] = {
|
views: dict[str, Select[Any]] = {
|
||||||
"kernel_view": select(
|
"kernel_view": select(
|
||||||
|
Kernel.kernel_uuid.label("kernel_uuid"),
|
||||||
|
Kernel.workload_id.label("workload_id"),
|
||||||
|
Workload.name.label("workload_name"),
|
||||||
Kernel.kernel_name,
|
Kernel.kernel_name,
|
||||||
func.count(Dispatch.dispatch_id).label("dispatch_count"),
|
func.count(Dispatch.dispatch_id).label("dispatch_count"),
|
||||||
func.sum(Dispatch.end_timestamp - Dispatch.start_timestamp).label(
|
func.sum(Dispatch.end_timestamp - Dispatch.start_timestamp).label(
|
||||||
"duration_ns_sum"
|
"duration_ns_sum"
|
||||||
),
|
),
|
||||||
|
func.min(Dispatch.end_timestamp - Dispatch.start_timestamp).label(
|
||||||
|
"duration_ns_min"
|
||||||
|
),
|
||||||
|
func.max(Dispatch.end_timestamp - Dispatch.start_timestamp).label(
|
||||||
|
"duration_ns_max"
|
||||||
|
),
|
||||||
median_calc.c.duration_ns_median,
|
median_calc.c.duration_ns_median,
|
||||||
func.avg(Dispatch.end_timestamp - Dispatch.start_timestamp).label(
|
func.avg(Dispatch.end_timestamp - Dispatch.start_timestamp).label(
|
||||||
"duration_ns_mean"
|
"duration_ns_mean"
|
||||||
@@ -262,24 +276,31 @@ def get_views() -> list[TextClause]:
|
|||||||
)
|
)
|
||||||
.select_from(Dispatch)
|
.select_from(Dispatch)
|
||||||
.join(Kernel, Dispatch.kernel_uuid == Kernel.kernel_uuid)
|
.join(Kernel, Dispatch.kernel_uuid == Kernel.kernel_uuid)
|
||||||
|
.join(Workload, Kernel.workload_id == Workload.workload_id)
|
||||||
.join(median_calc.subquery(), Kernel.kernel_name == median_calc.c.kernel_name)
|
.join(median_calc.subquery(), Kernel.kernel_name == median_calc.c.kernel_name)
|
||||||
.group_by(Kernel.kernel_name),
|
.group_by(
|
||||||
|
Kernel.kernel_uuid, Kernel.workload_id, Workload.name, Kernel.kernel_name
|
||||||
|
),
|
||||||
"metric_view": select(
|
"metric_view": select(
|
||||||
|
Workload.workload_id.label("workload_id"),
|
||||||
Workload.name.label("workload_name"),
|
Workload.name.label("workload_name"),
|
||||||
|
Kernel.kernel_uuid.label("kernel_uuid"),
|
||||||
Kernel.kernel_name,
|
Kernel.kernel_name,
|
||||||
Metric.name.label("metric_name"),
|
MetricDefinition.metric_uuid.label("metric_uuid"),
|
||||||
Metric.metric_id,
|
MetricDefinition.name.label("metric_name"),
|
||||||
Metric.description,
|
MetricDefinition.metric_id,
|
||||||
Metric.table_name,
|
MetricDefinition.description,
|
||||||
Metric.sub_table_name,
|
MetricDefinition.table_name,
|
||||||
Metric.unit,
|
MetricDefinition.sub_table_name,
|
||||||
Value.value_name,
|
MetricDefinition.unit,
|
||||||
Value.value,
|
MetricValue.value_uuid.label("value_uuid"),
|
||||||
|
MetricValue.value_name,
|
||||||
|
MetricValue.value,
|
||||||
)
|
)
|
||||||
.select_from(Metric)
|
.select_from(MetricDefinition)
|
||||||
.join(Kernel, Metric.kernel_uuid == Kernel.kernel_uuid)
|
.join(Workload, MetricDefinition.workload_id == Workload.workload_id)
|
||||||
.join(Value, Metric.metric_uuid == Value.metric_uuid)
|
.join(MetricValue, MetricDefinition.metric_uuid == MetricValue.metric_uuid)
|
||||||
.join(Workload, Kernel.workload_id == Workload.workload_id),
|
.join(Kernel, MetricValue.kernel_uuid == Kernel.kernel_uuid),
|
||||||
}
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ def multi_bar_chart(
|
|||||||
|
|
||||||
def create_instruction_mix_bar_chart(display_df: pd.DataFrame, df_unit: str) -> px.bar:
|
def create_instruction_mix_bar_chart(display_df: pd.DataFrame, df_unit: str) -> px.bar:
|
||||||
display_df = display_df.copy()
|
display_df = display_df.copy()
|
||||||
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "" else 0)
|
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0)
|
||||||
|
|
||||||
return px.bar(
|
return px.bar(
|
||||||
display_df,
|
display_df,
|
||||||
@@ -78,7 +78,7 @@ def create_multi_bar_charts(
|
|||||||
display_df: pd.DataFrame, table_id: int, df_unit: str
|
display_df: pd.DataFrame, table_id: int, df_unit: str
|
||||||
) -> list[px.bar]:
|
) -> list[px.bar]:
|
||||||
display_df = display_df.copy()
|
display_df = display_df.copy()
|
||||||
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "" else 0)
|
display_df["Avg"] = display_df["Avg"].apply(lambda x: int(x) if x != "N/A" else 0)
|
||||||
|
|
||||||
nested_bar = multi_bar_chart(table_id, display_df)
|
nested_bar = multi_bar_chart(table_id, display_df)
|
||||||
charts = []
|
charts = []
|
||||||
@@ -103,7 +103,9 @@ def create_multi_bar_charts(
|
|||||||
|
|
||||||
def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]:
|
def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]:
|
||||||
display_df = display_df.copy()
|
display_df = display_df.copy()
|
||||||
display_df["Avg"] = display_df["Avg"].apply(lambda x: float(x) if x != "" else 0.0)
|
display_df["Avg"] = display_df["Avg"].apply(
|
||||||
|
lambda x: float(x) if x != "N/A" else 0.0
|
||||||
|
)
|
||||||
|
|
||||||
charts = []
|
charts = []
|
||||||
|
|
||||||
@@ -144,7 +146,7 @@ def create_sol_charts(display_df: pd.DataFrame, table_id: int) -> list[px.bar]:
|
|||||||
elif table_id == 1101:
|
elif table_id == 1101:
|
||||||
# Special formatting reference 'Pct of Peak' value
|
# Special formatting reference 'Pct of Peak' value
|
||||||
display_df["Pct of Peak"] = display_df["Pct of Peak"].apply(
|
display_df["Pct of Peak"] = display_df["Pct of Peak"].apply(
|
||||||
lambda x: float(x) if x != "" else 0.0
|
lambda x: float(x) if x != "N/A" else 0.0
|
||||||
)
|
)
|
||||||
charts.append(
|
charts.append(
|
||||||
px.bar(
|
px.bar(
|
||||||
|
|||||||
@@ -1290,6 +1290,8 @@ def apply_dispatch_filter(df: pd.DataFrame, workload: schema.Workload) -> pd.Dat
|
|||||||
# NB: support ignoring the 1st n dispatched execution by '> n'
|
# NB: support ignoring the 1st n dispatched execution by '> n'
|
||||||
# The better way may be parsing python slice string
|
# The better way may be parsing python slice string
|
||||||
for dispatch_id in workload.filter_dispatch_ids:
|
for dispatch_id in workload.filter_dispatch_ids:
|
||||||
|
if isinstance(dispatch_id, str) and ">" in dispatch_id:
|
||||||
|
dispatch_id = re.match(r"\>\s*(\d+)", dispatch_id).group(1)
|
||||||
if int(dispatch_id) >= len(df): # subtract 2 bc of the two header rows
|
if int(dispatch_id) >= len(df): # subtract 2 bc of the two header rows
|
||||||
console_error("analysis", f"{dispatch_id} is an invalid dispatch id.")
|
console_error("analysis", f"{dispatch_id} is an invalid dispatch id.")
|
||||||
|
|
||||||
@@ -1297,7 +1299,7 @@ def apply_dispatch_filter(df: pd.DataFrame, workload: schema.Workload) -> pd.Dat
|
|||||||
isinstance(workload.filter_dispatch_ids[0], str)
|
isinstance(workload.filter_dispatch_ids[0], str)
|
||||||
and ">" in workload.filter_dispatch_ids[0]
|
and ">" in workload.filter_dispatch_ids[0]
|
||||||
):
|
):
|
||||||
dispatch_match = re.match(r"\> (\d+)", workload.filter_dispatch_ids[0])
|
dispatch_match = re.match(r"\>\s*(\d+)", workload.filter_dispatch_ids[0])
|
||||||
df = df[
|
df = df[
|
||||||
df[schema.PMC_PERF_FILE_PREFIX]["Dispatch_ID"]
|
df[schema.PMC_PERF_FILE_PREFIX]["Dispatch_ID"]
|
||||||
> int(dispatch_match.group(1))
|
> int(dispatch_match.group(1))
|
||||||
|
|||||||
@@ -174,15 +174,15 @@ def generate_machine_specs(
|
|||||||
##########################################
|
##########################################
|
||||||
machine_info = extract_machine_info()
|
machine_info = extract_machine_info()
|
||||||
|
|
||||||
# FIXME: use device
|
|
||||||
# Load amd-smi data
|
|
||||||
gpu_info = extract_gpu_info()
|
|
||||||
|
|
||||||
##########################################
|
##########################################
|
||||||
## B. SoC Specs
|
## B. SoC Specs
|
||||||
##########################################
|
##########################################
|
||||||
soc_info = extract_soc_info()
|
soc_info = extract_soc_info()
|
||||||
|
|
||||||
|
# FIXME: use device
|
||||||
|
# Load amd-smi data
|
||||||
|
gpu_info = extract_gpu_info(gpu_arch=soc_info["gpu_arch"])
|
||||||
|
|
||||||
# Combine all specifications
|
# Combine all specifications
|
||||||
with amdsmi_ctx():
|
with amdsmi_ctx():
|
||||||
specs = MachineSpecs(
|
specs = MachineSpecs(
|
||||||
@@ -269,7 +269,16 @@ def extract_machine_info() -> dict[str, Any]:
|
|||||||
|
|
||||||
|
|
||||||
@demarcate
|
@demarcate
|
||||||
def extract_gpu_info() -> dict[str, Any]:
|
def extract_gpu_info(gpu_arch: Optional[str]) -> dict[str, Any]:
|
||||||
|
# Partition is only supported on >= MI 300 series
|
||||||
|
# (gpu_arch should be gfx940 or higher for MI300+)
|
||||||
|
is_partition_supported = False
|
||||||
|
if gpu_arch and gpu_arch.startswith("gfx") and len(gpu_arch) >= 6:
|
||||||
|
try:
|
||||||
|
is_partition_supported = int(gpu_arch[3:6], 16) >= 0x940
|
||||||
|
except ValueError:
|
||||||
|
pass # Invalid hex string, keep is_partition_supported as False
|
||||||
|
|
||||||
result: dict[str, Optional[str]] = {
|
result: dict[str, Optional[str]] = {
|
||||||
"vbios": None,
|
"vbios": None,
|
||||||
"compute_partition": None,
|
"compute_partition": None,
|
||||||
@@ -278,17 +287,22 @@ def extract_gpu_info() -> dict[str, Any]:
|
|||||||
|
|
||||||
with amdsmi_ctx():
|
with amdsmi_ctx():
|
||||||
result["vbios"] = get_gpu_vbios_part_number()
|
result["vbios"] = get_gpu_vbios_part_number()
|
||||||
result["compute_partition"] = get_gpu_compute_partition()
|
if is_partition_supported:
|
||||||
result["memory_partition"] = get_gpu_memory_partition()
|
result["compute_partition"] = get_gpu_compute_partition()
|
||||||
|
result["memory_partition"] = get_gpu_memory_partition()
|
||||||
|
else:
|
||||||
|
result["compute_partition"] = "N/A"
|
||||||
|
result["memory_partition"] = "N/A"
|
||||||
|
|
||||||
# Apply defaults and warnings
|
# Apply defaults and warnings
|
||||||
if result["compute_partition"] == "N/A" or not result["compute_partition"]:
|
if is_partition_supported:
|
||||||
console_warning("Cannot detect accelerator partition from amd-smi.")
|
if result["compute_partition"] == "N/A" or not result["compute_partition"]:
|
||||||
console_warning("Applying default accelerator partition: SPX")
|
console_warning("Cannot detect accelerator partition from amd-smi.")
|
||||||
result["compute_partition"] = "SPX"
|
console_warning("Applying default accelerator partition: SPX")
|
||||||
|
result["compute_partition"] = "SPX"
|
||||||
|
|
||||||
if result["memory_partition"] == "N/A" or not result["memory_partition"]:
|
if result["memory_partition"] == "N/A" or not result["memory_partition"]:
|
||||||
console_warning("Cannot detect memory partition from amd-smi.")
|
console_warning("Cannot detect memory partition from amd-smi.")
|
||||||
|
|
||||||
console_debug(
|
console_debug(
|
||||||
f"vbios is {result['vbios']}, compute partition is "
|
f"vbios is {result['vbios']}, compute partition is "
|
||||||
|
|||||||
@@ -235,6 +235,101 @@ def detect_rocprof(args: argparse.Namespace) -> str:
|
|||||||
return rocprof_cmd
|
return rocprof_cmd
|
||||||
|
|
||||||
|
|
||||||
|
def perform_attach_detach(new_env: dict[str, str], options: dict[str, Any]) -> None:
|
||||||
|
@contextmanager
|
||||||
|
def temporary_env(env_vars: dict[str, str]) -> Generator[None, None, None]:
|
||||||
|
"""
|
||||||
|
Temporarily change the environment variable of this application.
|
||||||
|
"""
|
||||||
|
original_env = os.environ.copy()
|
||||||
|
os.environ.update({k: str(v) for k, v in env_vars.items()})
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
os.environ.clear()
|
||||||
|
os.environ.update(original_env)
|
||||||
|
|
||||||
|
with temporary_env(new_env):
|
||||||
|
libname = options["ROCPROF_ATTACH_LIBRARY"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
c_lib = ctypes.CDLL(libname)
|
||||||
|
if c_lib is None:
|
||||||
|
console_error(f"Error opening {libname}")
|
||||||
|
except Exception as e:
|
||||||
|
console_error(f"Error loading {libname}: {e}")
|
||||||
|
|
||||||
|
# Set argument and return types for attach/detach functions
|
||||||
|
try:
|
||||||
|
# old attach/detach API
|
||||||
|
c_lib.attach.argtypes = [ctypes.c_uint]
|
||||||
|
except Exception as e:
|
||||||
|
console_debug(
|
||||||
|
"Error setting old attach/detach API argument "
|
||||||
|
f"types: {e}, trying new API"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
# new attach/detach API
|
||||||
|
c_lib.rocattach_attach.restype = ctypes.c_int
|
||||||
|
c_lib.rocattach_attach.argtypes = [ctypes.c_int]
|
||||||
|
c_lib.rocattach_detach.restype = ctypes.c_int
|
||||||
|
c_lib.rocattach_detach.argtypes = [ctypes.c_int]
|
||||||
|
except Exception as e:
|
||||||
|
console_error(
|
||||||
|
f"Error setting attach/detach function argument types: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
pid = options["ROCPROF_ATTACH_PID"]
|
||||||
|
if pid is None:
|
||||||
|
console_error("Mode of attach/detach must have setup for process ID")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# old attach/detach API
|
||||||
|
c_lib.attach(int(pid))
|
||||||
|
except Exception as e:
|
||||||
|
console_debug(f"Error attaching with old API: {e}, trying new API")
|
||||||
|
try:
|
||||||
|
# new attach/detach API
|
||||||
|
attach_status = c_lib.rocattach_attach(int(pid))
|
||||||
|
if attach_status != 0:
|
||||||
|
console_error(
|
||||||
|
f"Error attaching to process {pid}, "
|
||||||
|
f"rocattach_attach returned {attach_status}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
console_error(f"Error attaching to process {pid}: {e}")
|
||||||
|
|
||||||
|
duration = os.environ.get("ROCPROF_ATTACH_DURATION", None)
|
||||||
|
if duration is None:
|
||||||
|
console_log(
|
||||||
|
f"\033[93mAttach to process with ID {pid} is successful, "
|
||||||
|
"Press Enter to detach...\033[0m"
|
||||||
|
)
|
||||||
|
input()
|
||||||
|
else:
|
||||||
|
console_log(
|
||||||
|
f"\033[93mAttach to process with ID {pid} is successful, "
|
||||||
|
f"detach will happen in {duration} milliseconds...\033[0m"
|
||||||
|
)
|
||||||
|
time.sleep(int(duration) / 1000)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# old attach/detach API
|
||||||
|
c_lib.detach(int(pid))
|
||||||
|
except Exception as e:
|
||||||
|
console_debug(f"Error detaching with old API: {e}, trying new API")
|
||||||
|
try:
|
||||||
|
# new attach/detach API
|
||||||
|
detach_status = c_lib.rocattach_detach(int(pid))
|
||||||
|
if detach_status != 0:
|
||||||
|
console_error(
|
||||||
|
f"Error detaching from process {pid}, "
|
||||||
|
f"rocattach_detach returned {detach_status}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
console_error(f"Error detaching from process {pid}: {e}")
|
||||||
|
|
||||||
|
|
||||||
def capture_subprocess_output(
|
def capture_subprocess_output(
|
||||||
subprocess_args: list[str],
|
subprocess_args: list[str],
|
||||||
new_env: Optional[dict[str, str]] = None,
|
new_env: Optional[dict[str, str]] = None,
|
||||||
@@ -788,49 +883,7 @@ def run_prof(
|
|||||||
console_debug(f"rocprof sdk env vars: {new_env}")
|
console_debug(f"rocprof sdk env vars: {new_env}")
|
||||||
|
|
||||||
if is_mode_live_attach:
|
if is_mode_live_attach:
|
||||||
|
perform_attach_detach(new_env, options)
|
||||||
@contextmanager
|
|
||||||
def temporary_env(env_vars: dict[str, str]) -> Generator[None, None, None]:
|
|
||||||
"""
|
|
||||||
Temporarily change the environment variable of this application.
|
|
||||||
"""
|
|
||||||
original_env = os.environ.copy()
|
|
||||||
os.environ.update({k: str(v) for k, v in env_vars.items()})
|
|
||||||
try:
|
|
||||||
yield
|
|
||||||
finally:
|
|
||||||
os.environ.clear()
|
|
||||||
os.environ.update(original_env)
|
|
||||||
|
|
||||||
with temporary_env(new_env):
|
|
||||||
libname = options["ROCPROF_ATTACH_LIBRARY"]
|
|
||||||
c_lib = ctypes.CDLL(libname)
|
|
||||||
if c_lib is None:
|
|
||||||
console_error(f"Error opening {libname}")
|
|
||||||
c_lib.attach.argtypes = [ctypes.c_uint]
|
|
||||||
|
|
||||||
pid = options["ROCPROF_ATTACH_PID"]
|
|
||||||
if pid is None:
|
|
||||||
console_error(
|
|
||||||
"Mode of attach/detach must have setup for process ID"
|
|
||||||
)
|
|
||||||
|
|
||||||
c_lib.attach(int(pid))
|
|
||||||
duration = os.environ.get("ROCPROF_ATTACH_DURATION", None)
|
|
||||||
if duration is None:
|
|
||||||
console_log(
|
|
||||||
f"\033[93mAttach to process with ID {pid} is successful, "
|
|
||||||
"Press Enter to detach...\033[0m"
|
|
||||||
)
|
|
||||||
input()
|
|
||||||
else:
|
|
||||||
console_log(
|
|
||||||
f"\033[93mAttach to process with ID {pid} is successful, "
|
|
||||||
f"detach will happen in {duration} milliseconds...\033[0m"
|
|
||||||
)
|
|
||||||
time.sleep(int(duration) / 1000)
|
|
||||||
c_lib.detach(int(pid))
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if app_cmd is None:
|
if app_cmd is None:
|
||||||
console_error(
|
console_error(
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ import re
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -989,19 +990,19 @@ def test_analyze_rocpd(
|
|||||||
Dispatch,
|
Dispatch,
|
||||||
Kernel,
|
Kernel,
|
||||||
Metadata,
|
Metadata,
|
||||||
Metric,
|
MetricDefinition,
|
||||||
|
MetricValue,
|
||||||
RooflineData,
|
RooflineData,
|
||||||
Value,
|
|
||||||
Workload,
|
Workload,
|
||||||
)
|
)
|
||||||
|
|
||||||
table_name_map = {
|
table_name_map = {
|
||||||
"compute_workload": Workload,
|
"compute_workload": Workload,
|
||||||
"compute_metric": Metric,
|
"compute_metric_definition": MetricDefinition,
|
||||||
"compute_roofline_data": RooflineData,
|
"compute_roofline_data": RooflineData,
|
||||||
"compute_dispatch": Dispatch,
|
"compute_dispatch": Dispatch,
|
||||||
"compute_kernel": Kernel,
|
"compute_kernel": Kernel,
|
||||||
"compute_value": Value,
|
"compute_metric_value": MetricValue,
|
||||||
"compute_metadata": Metadata,
|
"compute_metadata": Metadata,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2268,6 +2269,7 @@ def test_live_attach_detach_block(binary_handler_profile_rocprof_compute):
|
|||||||
try:
|
try:
|
||||||
# Start workload
|
# Start workload
|
||||||
process_workload = subprocess.Popen(config["app_hip_dynamic_shared"], env=env)
|
process_workload = subprocess.Popen(config["app_hip_dynamic_shared"], env=env)
|
||||||
|
time.sleep(5) # Give workload time to start
|
||||||
|
|
||||||
attach_detach = {
|
attach_detach = {
|
||||||
"attach_pid": process_workload.pid,
|
"attach_pid": process_workload.pid,
|
||||||
@@ -2316,8 +2318,9 @@ def test_live_attach_detach_block_thread_sleep(binary_handler_profile_rocprof_co
|
|||||||
try:
|
try:
|
||||||
# Start workload with sleep mode enabled
|
# Start workload with sleep mode enabled
|
||||||
process_workload = subprocess.Popen(
|
process_workload = subprocess.Popen(
|
||||||
[config["app_hip_dynamic_shared"], "--enable-sleep"], env=env
|
[*config["app_hip_dynamic_shared"], "--enable-sleep"], env=env
|
||||||
)
|
)
|
||||||
|
time.sleep(5) # Give workload time to start
|
||||||
|
|
||||||
attach_detach = {
|
attach_detach = {
|
||||||
"attach_pid": process_workload.pid,
|
"attach_pid": process_workload.pid,
|
||||||
@@ -2358,7 +2361,7 @@ def test_live_attach_detach_block_thread_sleep(binary_handler_profile_rocprof_co
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.live_attach_detach
|
@pytest.mark.live_attach_detach
|
||||||
def test_live_attach_detach_singlepath_launch_stats(
|
def test_live_attach_detach_singlepass_launch_stats(
|
||||||
binary_handler_profile_rocprof_compute,
|
binary_handler_profile_rocprof_compute,
|
||||||
):
|
):
|
||||||
options = ["--set", "launch_stats"]
|
options = ["--set", "launch_stats"]
|
||||||
@@ -2374,6 +2377,7 @@ def test_live_attach_detach_singlepath_launch_stats(
|
|||||||
try:
|
try:
|
||||||
# Start workload
|
# Start workload
|
||||||
process_workload = subprocess.Popen(config["app_hip_dynamic_shared"], env=env)
|
process_workload = subprocess.Popen(config["app_hip_dynamic_shared"], env=env)
|
||||||
|
time.sleep(5) # Give workload time to start
|
||||||
|
|
||||||
attach_detach = {
|
attach_detach = {
|
||||||
"attach_pid": process_workload.pid,
|
"attach_pid": process_workload.pid,
|
||||||
|
|||||||
@@ -242,6 +242,19 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
|
|||||||
## ROCprofiler-SDK 1.1.0 for ROCm release 7.2
|
## ROCprofiler-SDK 1.1.0 for ROCm release 7.2
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Strix halo support for counter collection.
|
- Counter collection support for `gfx1150` and `gfx1151` (Strix Halo).
|
||||||
|
- HSA Extension API v8 support.
|
||||||
|
- `hipStreamCopyAttributes` API implementation.
|
||||||
|
|
||||||
|
### Optimized
|
||||||
|
|
||||||
|
- Improved process attachment and updated the corresponding [documentation](https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/how-to/using-rocprofv3-process-attachment.html).
|
||||||
|
- Improved [Quick reference guide for rocprofv3] (https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/quick_guide.html).
|
||||||
|
- Updated installation documentation with links to the latest repository (https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/latest/install/installation.html).
|
||||||
|
|
||||||
|
### Resolved issues
|
||||||
|
- Fixed multi-GPU dimension mismatch.
|
||||||
|
- Fixed device lock issue for dispatch counters.
|
||||||
|
- Addressed OpenMP Tools task scheduling null pointer exception.
|
||||||
|
- Fixed stream ID errors arising during process attachment.
|
||||||
|
- Fixed issues arising during dynamic code object loading.
|
||||||
|
|||||||
@@ -1004,6 +1004,11 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipGetProcAddress_spt)
|
|||||||
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 20
|
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 20
|
||||||
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipKernelGetParamInfo)
|
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipKernelGetParamInfo)
|
||||||
#endif
|
#endif
|
||||||
|
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 21
|
||||||
|
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipExtDisableLogging)
|
||||||
|
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipExtEnableLogging)
|
||||||
|
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipExtSetLoggingParams)
|
||||||
|
#endif
|
||||||
#if HIP_RUNTIME_API_TABLE_STEP_VERSION == 0
|
#if HIP_RUNTIME_API_TABLE_STEP_VERSION == 0
|
||||||
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 442);
|
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 442);
|
||||||
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 1
|
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 1
|
||||||
@@ -1046,6 +1051,8 @@ static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 506);
|
|||||||
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 507);
|
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 507);
|
||||||
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 20
|
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 20
|
||||||
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 508);
|
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 508);
|
||||||
|
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 21
|
||||||
|
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 511);
|
||||||
#else
|
#else
|
||||||
# if !defined(ROCPROFILER_UNSAFE_NO_VERSION_CHECK) && \
|
# if !defined(ROCPROFILER_UNSAFE_NO_VERSION_CHECK) && \
|
||||||
(defined(ROCPROFILER_CI) && ROCPROFILER_CI > 0)
|
(defined(ROCPROFILER_CI) && ROCPROFILER_CI > 0)
|
||||||
|
|||||||
@@ -3377,6 +3377,26 @@ typedef union rocprofiler_hip_api_args_t
|
|||||||
size_t* paramSize;
|
size_t* paramSize;
|
||||||
} hipKernelGetParamInfo;
|
} hipKernelGetParamInfo;
|
||||||
#endif
|
#endif
|
||||||
|
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 21
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
// Empty struct has a size of 0 in C but size of 1 in C++.
|
||||||
|
// Add the rocprofiler_hip_api_no_args struct to fix this
|
||||||
|
rocprofiler_hip_api_no_args no_args;
|
||||||
|
} hipExtDisableLogging;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
// Empty struct has a size of 0 in C but size of 1 in C++.
|
||||||
|
// Add the rocprofiler_hip_api_no_args struct to fix this
|
||||||
|
rocprofiler_hip_api_no_args no_args;
|
||||||
|
} hipExtEnableLogging;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
size_t log_level;
|
||||||
|
size_t log_size;
|
||||||
|
size_t log_mask;
|
||||||
|
} hipExtSetLoggingParams;
|
||||||
|
#endif
|
||||||
} rocprofiler_hip_api_args_t;
|
} rocprofiler_hip_api_args_t;
|
||||||
|
|
||||||
ROCPROFILER_EXTERN_C_FINI
|
ROCPROFILER_EXTERN_C_FINI
|
||||||
|
|||||||
@@ -575,6 +575,11 @@ typedef enum rocprofiler_hip_runtime_api_id_t // NOLINT(performance-enum-size)
|
|||||||
#endif
|
#endif
|
||||||
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 20
|
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 20
|
||||||
ROCPROFILER_HIP_RUNTIME_API_ID_hipKernelGetParamInfo,
|
ROCPROFILER_HIP_RUNTIME_API_ID_hipKernelGetParamInfo,
|
||||||
|
#endif
|
||||||
|
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 21
|
||||||
|
ROCPROFILER_HIP_RUNTIME_API_ID_hipExtDisableLogging,
|
||||||
|
ROCPROFILER_HIP_RUNTIME_API_ID_hipExtEnableLogging,
|
||||||
|
ROCPROFILER_HIP_RUNTIME_API_ID_hipExtSetLoggingParams,
|
||||||
#endif
|
#endif
|
||||||
ROCPROFILER_HIP_RUNTIME_API_ID_LAST,
|
ROCPROFILER_HIP_RUNTIME_API_ID_LAST,
|
||||||
} rocprofiler_hip_runtime_api_id_t;
|
} rocprofiler_hip_runtime_api_id_t;
|
||||||
|
|||||||
@@ -623,6 +623,12 @@ ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipGetProcAddress_spt_fn, 506);
|
|||||||
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipKernelGetParamInfo_fn, 507);
|
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipKernelGetParamInfo_fn, 507);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 21
|
||||||
|
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipExtDisableLogging_fn, 508);
|
||||||
|
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipExtEnableLogging_fn, 509);
|
||||||
|
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipExtSetLoggingParams_fn, 510);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HIP_RUNTIME_API_TABLE_STEP_VERSION == 0
|
#if HIP_RUNTIME_API_TABLE_STEP_VERSION == 0
|
||||||
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 442)
|
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 442)
|
||||||
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 1
|
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 1
|
||||||
@@ -665,6 +671,8 @@ ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 506)
|
|||||||
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 507)
|
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 507)
|
||||||
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 20
|
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 20
|
||||||
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 508)
|
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 508)
|
||||||
|
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 21
|
||||||
|
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 511)
|
||||||
#else
|
#else
|
||||||
INTERNAL_CI_ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 0)
|
INTERNAL_CI_ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 0)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -650,6 +650,11 @@ HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNT
|
|||||||
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipKernelGetParamInfo, hipKernelGetParamInfo, hipKernelGetParamInfo_fn, kernel, paramIndex, paramOffset, paramSize);
|
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipKernelGetParamInfo, hipKernelGetParamInfo, hipKernelGetParamInfo_fn, kernel, paramIndex, paramOffset, paramSize);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 21
|
||||||
|
HIP_API_INFO_DEFINITION_0(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipExtDisableLogging, hipExtDisableLogging, hipExtDisableLogging_fn);
|
||||||
|
HIP_API_INFO_DEFINITION_0(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipExtEnableLogging, hipExtEnableLogging, hipExtEnableLogging_fn);
|
||||||
|
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipExtSetLoggingParams, hipExtSetLoggingParams, hipExtSetLoggingParams_fn, log_level, log_size, log_mask);
|
||||||
|
#endif
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
#
|
#
|
||||||
# function for
|
# function for
|
||||||
#
|
#
|
||||||
@@ -142,11 +145,27 @@ function(rocprofiler_systems_causal_example_executable _NAME)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||||
install(
|
set(_TARGETS
|
||||||
TARGETS ${_NAME} ${_NAME}-rocprofsys ${_NAME}-coz
|
${_NAME}
|
||||||
DESTINATION bin
|
${_NAME}-rocprofsys
|
||||||
COMPONENT rocprofiler-systems-examples
|
${_NAME}-ndebug
|
||||||
OPTIONAL
|
${_NAME}-rocprofsys-ndebug
|
||||||
|
${_NAME}-coz
|
||||||
)
|
)
|
||||||
|
set(_EXISTING_TARGETS)
|
||||||
|
|
||||||
|
foreach(_TARGET IN LISTS _TARGETS)
|
||||||
|
if(TARGET ${_TARGET})
|
||||||
|
list(APPEND _EXISTING_TARGETS ${_TARGET})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
if(_EXISTING_TARGETS)
|
||||||
|
install(
|
||||||
|
TARGETS ${_EXISTING_TARGETS}
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-code-coverage-example LANGUAGES CXX)
|
project(rocprofiler-systems-code-coverage-example LANGUAGES CXX)
|
||||||
@@ -22,7 +25,11 @@ target_link_libraries(code-coverage PRIVATE Threads::Threads)
|
|||||||
target_compile_options(code-coverage PRIVATE ${_FLAGS})
|
target_compile_options(code-coverage PRIVATE ${_FLAGS})
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||||
install(TARGETS code-coverage DESTINATION bin COMPONENT rocprofiler-systems-examples)
|
install(
|
||||||
|
TARGETS code-coverage
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(PYTHON_FILES code-coverage.py)
|
set(PYTHON_FILES code-coverage.py)
|
||||||
@@ -42,7 +49,7 @@ if(Python3_FOUND)
|
|||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||||
install(
|
install(
|
||||||
PROGRAMS ${PROJECT_BINARY_DIR}/${_FILE}
|
PROGRAMS ${PROJECT_BINARY_DIR}/${_FILE}
|
||||||
DESTINATION bin
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
COMPONENT rocprofiler-systems-examples
|
COMPONENT rocprofiler-systems-examples
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-fork LANGUAGES CXX)
|
project(rocprofiler-systems-fork LANGUAGES CXX)
|
||||||
@@ -18,7 +21,13 @@ target_link_libraries(
|
|||||||
target_compile_options(fork-example PRIVATE ${_FLAGS})
|
target_compile_options(fork-example PRIVATE ${_FLAGS})
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||||
install(TARGETS fork-example DESTINATION bin COMPONENT rocprofiler-systems-examples)
|
if(TARGET fork-example)
|
||||||
|
install(
|
||||||
|
TARGETS fork-example
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# HIP fork example (multi-process concurrency test)
|
# HIP fork example (multi-process concurrency test)
|
||||||
@@ -81,10 +90,10 @@ if(HIPCC_EXECUTABLE)
|
|||||||
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET hipMallocConcurrencyMproc)
|
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET hipMallocConcurrencyMproc)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET hipMallocConcurrencyMproc)
|
||||||
install(
|
install(
|
||||||
TARGETS hipMallocConcurrencyMproc
|
TARGETS hipMallocConcurrencyMproc
|
||||||
DESTINATION bin
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
COMPONENT rocprofiler-systems-examples
|
COMPONENT rocprofiler-systems-examples
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,25 +1,5 @@
|
|||||||
################################################################################
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
# Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc.
|
# SPDX-License-Identifier: MIT
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
|
||||||
# in the Software without restriction, including without limitation the rights
|
|
||||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
# copies of the Software, and to permit persons to whom the Software is
|
|
||||||
# furnished to do so, subject to the following conditions:
|
|
||||||
#
|
|
||||||
# The above copyright notice and this permission notice shall be included in all
|
|
||||||
# copies or substantial portions of the Software.
|
|
||||||
#
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
# SOFTWARE.
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
@@ -157,11 +137,15 @@ if(HIP_FOUND AND rocjpeg_FOUND AND Threads_FOUND AND rocprofiler-register_FOUND)
|
|||||||
target_compile_options(jpegdecode PRIVATE ${_FLAGS})
|
target_compile_options(jpegdecode PRIVATE ${_FLAGS})
|
||||||
copy_image_files_and_make_copies()
|
copy_image_files_and_make_copies()
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET jpegdecode)
|
||||||
install(TARGETS jpegdecode DESTINATION bin COMPONENT rocprofiler-systems-examples)
|
|
||||||
install(
|
install(
|
||||||
FILES ${CMAKE_BINARY_DIR}/images
|
TARGETS jpegdecode
|
||||||
DESTINATION share/rocprofiler-systems/tests/images
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
install(
|
||||||
|
DIRECTORY ${CMAKE_BINARY_DIR}/images/
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples/images
|
||||||
COMPONENT rocprofiler-systems-examples
|
COMPONENT rocprofiler-systems-examples
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-lulesh-example LANGUAGES C CXX)
|
project(rocprofiler-systems-lulesh-example LANGUAGES C CXX)
|
||||||
@@ -82,16 +85,11 @@ rocprofiler_systems_causal_example_executable(
|
|||||||
INCLUDE_DIRECTORIES ${PROJECT_SOURCE_DIR}/includes
|
INCLUDE_DIRECTORIES ${PROJECT_SOURCE_DIR}/includes
|
||||||
)
|
)
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND LULESH_BUILD_KOKKOS)
|
||||||
if(LULESH_BUILD_KOKKOS)
|
install(
|
||||||
install(
|
TARGETS kokkoscore kokkoscontainers kokkossimd
|
||||||
TARGETS kokkoscore kokkoscontainers
|
LIBRARY
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples/lib
|
||||||
COMPONENT rocprofiler-systems-examples
|
COMPONENT rocprofiler-systems-examples
|
||||||
)
|
)
|
||||||
set_target_properties(
|
|
||||||
lulesh
|
|
||||||
PROPERTIES INSTALL_RPATH "\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}"
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-mpi-examples LANGUAGES C CXX)
|
project(rocprofiler-systems-mpi-examples LANGUAGES C CXX)
|
||||||
@@ -77,16 +80,24 @@ add_executable(mpi-example mpi.cpp)
|
|||||||
target_link_libraries(mpi-example PRIVATE mpi-cxx-interface-library)
|
target_link_libraries(mpi-example PRIVATE mpi-cxx-interface-library)
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||||
install(
|
set(MPI_EXAMPLES
|
||||||
TARGETS
|
mpi-example
|
||||||
mpi-example
|
mpi-allgather
|
||||||
mpi-allgather
|
mpi-bcast
|
||||||
mpi-bcast
|
mpi-all2all
|
||||||
mpi-all2all
|
mpi-reduce
|
||||||
mpi-reduce
|
mpi-scatter-gather
|
||||||
mpi-scatter-gather
|
mpi-send-recv
|
||||||
mpi-send-recv
|
mpi-allreduce
|
||||||
DESTINATION bin
|
|
||||||
COMPONENT rocprofiler-systems-examples
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
foreach(MPI_EXAMPLE IN LISTS MPI_EXAMPLES)
|
||||||
|
if(TARGET ${MPI_EXAMPLE})
|
||||||
|
install(
|
||||||
|
TARGETS ${MPI_EXAMPLE}
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -89,13 +89,17 @@ endif()
|
|||||||
target_link_libraries(openmp-cg PRIVATE openmp-common)
|
target_link_libraries(openmp-cg PRIVATE openmp-common)
|
||||||
target_link_libraries(openmp-lu PRIVATE openmp-common)
|
target_link_libraries(openmp-lu PRIVATE openmp-common)
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
set(OPENMP_EXAMPLES openmp-cg openmp-lu)
|
||||||
install(
|
|
||||||
TARGETS openmp-cg openmp-lu
|
foreach(OPENMP_EXAMPLE IN LISTS OPENMP_EXAMPLES)
|
||||||
DESTINATION bin
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET ${OPENMP_EXAMPLE})
|
||||||
COMPONENT rocprofiler-systems-examples
|
install(
|
||||||
)
|
TARGETS ${OPENMP_EXAMPLE}
|
||||||
endif()
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
set(DEFAULT_GPU_TARGETS
|
set(DEFAULT_GPU_TARGETS
|
||||||
"gfx900"
|
"gfx900"
|
||||||
|
|||||||
@@ -280,3 +280,19 @@ set(ROCPROFSYS_OMPVV_OFFLOAD_TESTS
|
|||||||
rocprofiler_systems_message(STATUS
|
rocprofiler_systems_message(STATUS
|
||||||
"Successfully configured OMPVV"
|
"Successfully configured OMPVV"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||||
|
foreach(
|
||||||
|
test_target
|
||||||
|
IN
|
||||||
|
LISTS ROCPROFSYS_OMPVV_HOST_TESTS ROCPROFSYS_OMPVV_OFFLOAD_TESTS
|
||||||
|
)
|
||||||
|
if(TARGET "${test_target}-build")
|
||||||
|
install(
|
||||||
|
PROGRAMS "${OMPVV_BIN_DEST}/${test_target}"
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ endif()
|
|||||||
# Use the directory that actually contains the library we found
|
# Use the directory that actually contains the library we found
|
||||||
get_filename_component(_rocm_llvm_lib "${LIBOMPTARGET_SO}" DIRECTORY)
|
get_filename_component(_rocm_llvm_lib "${LIBOMPTARGET_SO}" DIRECTORY)
|
||||||
set(_rocm_clang_lib "${ROCM_ROOT_DIR}/lib")
|
set(_rocm_clang_lib "${ROCM_ROOT_DIR}/lib")
|
||||||
set(_COMMON_RPATH "${_rocm_llvm_lib};${_rocm_clang_lib};$ORIGIN")
|
set(_COMMON_RPATH "${_rocm_llvm_lib};${_rocm_clang_lib};$ORIGIN;$ORIGIN/lib")
|
||||||
if(ROCmVersion_DIR)
|
if(ROCmVersion_DIR)
|
||||||
list(APPEND _COMMON_RPATH "${ROCmVersion_DIR}/llvm/lib")
|
list(APPEND _COMMON_RPATH "${ROCmVersion_DIR}/llvm/lib")
|
||||||
endif()
|
endif()
|
||||||
@@ -126,3 +126,15 @@ rocprofiler_systems_custom_compilation(
|
|||||||
rocprofiler_systems_custom_compilation(TARGET openmp-target
|
rocprofiler_systems_custom_compilation(TARGET openmp-target
|
||||||
COMPILER ${OMP_TARGET_COMPILER}
|
COMPILER ${OMP_TARGET_COMPILER}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||||
|
if(TARGET openmp-target AND TARGET openmp-target-lib)
|
||||||
|
install(
|
||||||
|
TARGETS openmp-target openmp-target-lib
|
||||||
|
RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
LIBRARY
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples/lib
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-parallel-overhead-example LANGUAGES CXX)
|
project(rocprofiler-systems-parallel-overhead-example LANGUAGES CXX)
|
||||||
@@ -32,10 +35,13 @@ target_link_libraries(
|
|||||||
)
|
)
|
||||||
target_compile_definitions(parallel-overhead-locks PRIVATE USE_LOCKS=1)
|
target_compile_definitions(parallel-overhead-locks PRIVATE USE_LOCKS=1)
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
set(PARALLEL_OVERHEAD_EXAMPLES parallel-overhead parallel-overhead-locks)
|
||||||
install(
|
foreach(PARALLEL_OVERHEAD_EXAMPLE IN LISTS PARALLEL_OVERHEAD_EXAMPLES)
|
||||||
TARGETS parallel-overhead parallel-overhead-locks
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET ${PARALLEL_OVERHEAD_EXAMPLE})
|
||||||
DESTINATION bin
|
install(
|
||||||
COMPONENT rocprofiler-systems-examples
|
TARGETS ${PARALLEL_OVERHEAD_EXAMPLE}
|
||||||
)
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
endif()
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-python)
|
project(rocprofiler-systems-python)
|
||||||
@@ -36,7 +39,7 @@ if(Python3_FOUND)
|
|||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||||
install(
|
install(
|
||||||
PROGRAMS ${PROJECT_BINARY_DIR}/${_FILE}
|
PROGRAMS ${PROJECT_BINARY_DIR}/${_FILE}
|
||||||
DESTINATION bin
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
COMPONENT rocprofiler-systems-examples
|
COMPONENT rocprofiler-systems-examples
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -138,6 +138,14 @@ if(hip_FOUND AND rccl_FOUND)
|
|||||||
add_dependencies(rccl-tests::${_EXE_NAME} copy-${_EXE_NAME})
|
add_dependencies(rccl-tests::${_EXE_NAME} copy-${_EXE_NAME})
|
||||||
|
|
||||||
list(APPEND _RCCL_TEST_TARGETS "rccl-tests::${_EXE_NAME}")
|
list(APPEND _RCCL_TEST_TARGETS "rccl-tests::${_EXE_NAME}")
|
||||||
|
|
||||||
|
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||||
|
install(
|
||||||
|
PROGRAMS ${_EXE_DEST_PATH}
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
set(RCCL_TEST_TARGETS "${_RCCL_TEST_TARGETS}" CACHE INTERNAL "rccl-test targets")
|
set(RCCL_TEST_TARGETS "${_RCCL_TEST_TARGETS}" CACHE INTERNAL "rccl-test targets")
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-rewrite-caller-example LANGUAGES CXX)
|
project(rocprofiler-systems-rewrite-caller-example LANGUAGES CXX)
|
||||||
@@ -18,6 +21,10 @@ set(CMAKE_BUILD_TYPE "Debug")
|
|||||||
add_executable(rewrite-caller rewrite-caller.cpp)
|
add_executable(rewrite-caller rewrite-caller.cpp)
|
||||||
target_compile_options(rewrite-caller PRIVATE ${_FLAGS})
|
target_compile_options(rewrite-caller PRIVATE ${_FLAGS})
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET rewrite-caller)
|
||||||
install(TARGETS rewrite-caller DESTINATION bin COMPONENT rocprofiler-systems-examples)
|
install(
|
||||||
|
TARGETS rewrite-caller
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,24 +1,5 @@
|
|||||||
# MIT License
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
#
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
|
||||||
# in the Software without restriction, including without limitation the rights
|
|
||||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
# copies of the Software, and to permit persons to whom the Software is
|
|
||||||
# furnished to do so, subject to the following conditions:
|
|
||||||
#
|
|
||||||
# The above copyright notice and this permission notice shall be included in
|
|
||||||
# all copies or substantial portions of the Software.
|
|
||||||
#
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
# THE SOFTWARE.
|
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
@@ -113,6 +94,10 @@ if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE)
|
|||||||
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET roctx)
|
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET roctx)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET roctx)
|
||||||
install(TARGETS roctx DESTINATION bin COMPONENT rocprofiler-systems-examples)
|
install(
|
||||||
|
TARGETS roctx
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -40,3 +40,11 @@ target_compile_options(tests-compile-options INTERFACE -g)
|
|||||||
add_executable(thread-limit thread-limit.cpp)
|
add_executable(thread-limit thread-limit.cpp)
|
||||||
target_compile_definitions(thread-limit PRIVATE MAX_THREADS=${ROCPROFSYS_MAX_THREADS})
|
target_compile_definitions(thread-limit PRIVATE MAX_THREADS=${ROCPROFSYS_MAX_THREADS})
|
||||||
target_link_libraries(thread-limit PRIVATE Threads::Threads tests-compile-options)
|
target_link_libraries(thread-limit PRIVATE Threads::Threads tests-compile-options)
|
||||||
|
|
||||||
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET thread-limit)
|
||||||
|
install(
|
||||||
|
TARGETS thread-limit
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-trace-time-window-example LANGUAGES CXX)
|
project(rocprofiler-systems-trace-time-window-example LANGUAGES CXX)
|
||||||
@@ -18,10 +21,10 @@ set(CMAKE_BUILD_TYPE "Debug")
|
|||||||
add_executable(trace-time-window trace-time-window.cpp)
|
add_executable(trace-time-window trace-time-window.cpp)
|
||||||
target_compile_options(trace-time-window PRIVATE ${_FLAGS})
|
target_compile_options(trace-time-window PRIVATE ${_FLAGS})
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET trace-time-window)
|
||||||
install(
|
install(
|
||||||
TARGETS trace-time-window
|
TARGETS trace-time-window
|
||||||
DESTINATION bin
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
COMPONENT rocprofiler-systems-examples
|
COMPONENT rocprofiler-systems-examples
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-transferBench-example LANGUAGES CXX)
|
project(rocprofiler-systems-transferBench-example LANGUAGES CXX)
|
||||||
@@ -120,6 +123,10 @@ if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE)
|
|||||||
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET transferBench)
|
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET transferBench)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET transferBench)
|
||||||
install(TARGETS transferBench DESTINATION bin COMPONENT rocprofiler-systems-examples)
|
install(
|
||||||
|
TARGETS transferBench
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-transpose-example LANGUAGES CXX)
|
project(rocprofiler-systems-transpose-example LANGUAGES CXX)
|
||||||
@@ -93,6 +96,10 @@ if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE)
|
|||||||
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET transpose)
|
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET transpose)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET transpose)
|
||||||
install(TARGETS transpose DESTINATION bin COMPONENT rocprofiler-systems-examples)
|
install(
|
||||||
|
TARGETS transpose
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-user-api-example LANGUAGES CXX)
|
project(rocprofiler-systems-user-api-example LANGUAGES CXX)
|
||||||
@@ -24,6 +27,10 @@ target_link_libraries(
|
|||||||
PRIVATE Threads::Threads rocprofiler-systems::rocprofiler-systems-user-library
|
PRIVATE Threads::Threads rocprofiler-systems::rocprofiler-systems-user-library
|
||||||
)
|
)
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET user-api)
|
||||||
install(TARGETS user-api DESTINATION bin COMPONENT rocprofiler-systems-examples)
|
install(
|
||||||
|
TARGETS user-api
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
|
COMPONENT rocprofiler-systems-examples
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||||
|
|
||||||
project(rocprofiler-systems-videodecode-example LANGUAGES CXX)
|
project(rocprofiler-systems-videodecode-example LANGUAGES CXX)
|
||||||
@@ -176,15 +179,15 @@ if(FFMPEG_FOUND AND rocdecode_FOUND)
|
|||||||
target_compile_definitions(videodecode PUBLIC USE_AVCODEC_GREATER_THAN_58_134=1)
|
target_compile_definitions(videodecode PUBLIC USE_AVCODEC_GREATER_THAN_58_134=1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
if(ROCPROFSYS_INSTALL_EXAMPLES AND TARGET videodecode)
|
||||||
install(
|
install(
|
||||||
TARGETS videodecode
|
TARGETS videodecode
|
||||||
DESTINATION bin
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples
|
||||||
COMPONENT rocprofiler-systems-examples
|
COMPONENT rocprofiler-systems-examples
|
||||||
)
|
)
|
||||||
install(
|
install(
|
||||||
FILES ${CMAKE_BINARY_DIR}/videos
|
DIRECTORY ${CMAKE_BINARY_DIR}/videos/
|
||||||
DESTINATION share/rocprofiler-systems/tests/videos
|
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/rocprofiler-systems/examples/videos
|
||||||
COMPONENT rocprofiler-systems-examples
|
COMPONENT rocprofiler-systems-examples
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -192,6 +192,10 @@ prepare_environment_for_run(parser_data_t& _data)
|
|||||||
rocprofsys::argparse::add_ld_preload(_data);
|
rocprofsys::argparse::add_ld_preload(_data);
|
||||||
rocprofsys::argparse::add_ld_library_path(_data);
|
rocprofsys::argparse::add_ld_library_path(_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rocprofsys::argparse::add_torch_library_path(_data, _data.verbose > 0);
|
||||||
|
|
||||||
|
rocprofsys::common::consolidate_env_entries(_data.current);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|||||||
@@ -933,3 +933,9 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
|||||||
|
|
||||||
return _outv;
|
return _outv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
add_torch_library_path(std::vector<char*>& envp, const std::vector<char*>& argv)
|
||||||
|
{
|
||||||
|
rocprofsys::common::add_torch_library_path(envp, argv, verbose > 0, updated_envs);
|
||||||
|
}
|
||||||
|
|||||||
@@ -51,6 +51,8 @@ main(int argc, char** argv)
|
|||||||
_argv.emplace_back(argv[i]);
|
_argv.emplace_back(argv[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
add_torch_library_path(_env, _argv);
|
||||||
|
|
||||||
print_updated_environment(_env);
|
print_updated_environment(_env);
|
||||||
|
|
||||||
if(!_argv.empty())
|
if(!_argv.empty())
|
||||||
|
|||||||
@@ -35,3 +35,6 @@ get_initial_environment();
|
|||||||
|
|
||||||
std::vector<char*>
|
std::vector<char*>
|
||||||
parse_args(int argc, char** argv, std::vector<char*>& envp);
|
parse_args(int argc, char** argv, std::vector<char*>& envp);
|
||||||
|
|
||||||
|
void
|
||||||
|
add_torch_library_path(std::vector<char*>& envp, const std::vector<char*>& argv);
|
||||||
|
|||||||
@@ -26,9 +26,12 @@
|
|||||||
|
|
||||||
#include "common/join.hpp"
|
#include "common/join.hpp"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <numeric>
|
||||||
|
#include <sstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
@@ -197,7 +200,7 @@ remove_env(std::vector<char*>& _environ, std::string_view _env_var,
|
|||||||
{
|
{
|
||||||
if(match(itr))
|
if(match(itr))
|
||||||
{
|
{
|
||||||
free(itr);
|
std::free(itr);
|
||||||
itr = nullptr;
|
itr = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -266,6 +269,113 @@ discover_llvm_libdir_for_ompt(bool verbose = false)
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool
|
||||||
|
is_python_interpreter(std::string_view executable)
|
||||||
|
{
|
||||||
|
if(executable.empty()) return false;
|
||||||
|
|
||||||
|
const auto slash_pos = executable.rfind('/');
|
||||||
|
const auto basename = (slash_pos != std::string_view::npos)
|
||||||
|
? executable.substr(slash_pos + 1)
|
||||||
|
: executable;
|
||||||
|
|
||||||
|
if(basename == "python" || basename == "python3") return true;
|
||||||
|
|
||||||
|
constexpr std::string_view python3_prefix = "python3.";
|
||||||
|
|
||||||
|
const bool has_valid_prefix =
|
||||||
|
basename.size() > python3_prefix.size() &&
|
||||||
|
basename.substr(0, python3_prefix.size()) == python3_prefix;
|
||||||
|
if(!has_valid_prefix) return false;
|
||||||
|
|
||||||
|
const auto version_digits = basename.substr(python3_prefix.size());
|
||||||
|
|
||||||
|
return std::all_of(version_digits.begin(), version_digits.end(),
|
||||||
|
[](unsigned char c) { return std::isdigit(c); });
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string
|
||||||
|
discover_torch_libpath(const std::string& python_binary, bool verbose = false)
|
||||||
|
{
|
||||||
|
if(python_binary.empty()) return {};
|
||||||
|
|
||||||
|
const auto is_safe_executable_path = [](const std::string& path) {
|
||||||
|
// Allow only a conservative set of characters in the executable path to
|
||||||
|
// avoid injection when used in a shell command.
|
||||||
|
for(unsigned char c : path)
|
||||||
|
{
|
||||||
|
if(std::isalnum(c) != 0) continue;
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
case '/':
|
||||||
|
case '.':
|
||||||
|
case '_':
|
||||||
|
case '-':
|
||||||
|
case '+': break;
|
||||||
|
default: return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
if(!is_safe_executable_path(python_binary))
|
||||||
|
{
|
||||||
|
ROCPROFSYS_ENVIRON_LOG(
|
||||||
|
verbose, "Unsafe characters detected in Python interpreter path: %s\n",
|
||||||
|
python_binary.c_str());
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto cmd = "\"" + python_binary +
|
||||||
|
"\" -c \"import torch; print(torch.__path__[0])\" 2>/dev/null";
|
||||||
|
|
||||||
|
FILE* pipe = popen(cmd.c_str(), "r");
|
||||||
|
if(!pipe)
|
||||||
|
{
|
||||||
|
ROCPROFSYS_ENVIRON_LOG(verbose, "Failed to execute command: %s\n", cmd.c_str());
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
char buffer[1024];
|
||||||
|
std::string result;
|
||||||
|
while(fgets(buffer, sizeof(buffer), pipe))
|
||||||
|
{
|
||||||
|
result.append(buffer);
|
||||||
|
// stop if we've read the full line (torch path is printed on a single line)
|
||||||
|
if(!result.empty() && result.back() == '\n') break;
|
||||||
|
}
|
||||||
|
|
||||||
|
int status = pclose(pipe);
|
||||||
|
|
||||||
|
if(status != 0 || result.empty())
|
||||||
|
{
|
||||||
|
ROCPROFSYS_ENVIRON_LOG(verbose, "torch not found for Python interpreter: %s\n",
|
||||||
|
python_binary.c_str());
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
while(!result.empty() &&
|
||||||
|
(result.back() == '\n' || result.back() == '\r' || result.back() == ' '))
|
||||||
|
{
|
||||||
|
result.pop_back();
|
||||||
|
}
|
||||||
|
|
||||||
|
if(result.empty()) return {};
|
||||||
|
|
||||||
|
std::string torch_libdir = result + "/lib";
|
||||||
|
|
||||||
|
if(!::tim::filepath::direxists(torch_libdir))
|
||||||
|
{
|
||||||
|
ROCPROFSYS_ENVIRON_LOG(verbose, "torch lib directory does not exist: %s\n",
|
||||||
|
torch_libdir.c_str());
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
ROCPROFSYS_ENVIRON_LOG(verbose, "Discovered torch library path: %s\n",
|
||||||
|
torch_libdir.c_str());
|
||||||
|
return torch_libdir;
|
||||||
|
}
|
||||||
|
|
||||||
enum class update_mode : uint8_t
|
enum class update_mode : uint8_t
|
||||||
{
|
{
|
||||||
REPLACE = 0,
|
REPLACE = 0,
|
||||||
@@ -335,7 +445,7 @@ update_env(std::vector<char*>& _environ, std::string_view _env_var, Tp&& _env_va
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
free(itr);
|
std::free(itr);
|
||||||
itr = strdup(join('=', _env_var, _env_val_str).c_str());
|
itr = strdup(join('=', _env_var, _env_val_str).c_str());
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@@ -343,5 +453,145 @@ update_env(std::vector<char*>& _environ, std::string_view _env_var, Tp&& _env_va
|
|||||||
_environ.emplace_back(strdup(join('=', _env_var, _env_val_str).c_str()));
|
_environ.emplace_back(strdup(join('=', _env_var, _env_val_str).c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename UpdatedEnvsT>
|
||||||
|
inline void
|
||||||
|
add_torch_library_path(std::vector<char*>& envp, const std::vector<char*>& argv,
|
||||||
|
bool verbose, UpdatedEnvsT& updated_envs)
|
||||||
|
{
|
||||||
|
if(argv.empty() || argv.front() == nullptr) return;
|
||||||
|
if(!is_python_interpreter(argv.front())) return;
|
||||||
|
|
||||||
|
auto torch_libpath = discover_torch_libpath(argv.front(), verbose);
|
||||||
|
if(torch_libpath.empty()) return;
|
||||||
|
|
||||||
|
std::unordered_set<std::string> seen{ torch_libpath };
|
||||||
|
std::string result = torch_libpath;
|
||||||
|
|
||||||
|
constexpr std::string_view ld_prefix = "LD_LIBRARY_PATH=";
|
||||||
|
|
||||||
|
auto is_ld_path = [&](char* entry) {
|
||||||
|
return entry &&
|
||||||
|
std::string_view{ entry }.substr(0, ld_prefix.length()) == ld_prefix;
|
||||||
|
};
|
||||||
|
|
||||||
|
for(auto& entry : envp)
|
||||||
|
{
|
||||||
|
if(!is_ld_path(entry)) continue;
|
||||||
|
|
||||||
|
std::istringstream stream{ std::string{ entry + ld_prefix.length() } };
|
||||||
|
for(std::string path; std::getline(stream, path, ':');)
|
||||||
|
{
|
||||||
|
if(!path.empty() && seen.insert(path).second) result += ":" + path;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::free(entry);
|
||||||
|
entry = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
envp.erase(std::remove(envp.begin(), envp.end(), nullptr), envp.end());
|
||||||
|
envp.emplace_back(strdup(join("", ld_prefix, result).c_str()));
|
||||||
|
|
||||||
|
updated_envs.emplace(ld_prefix.substr(0, ld_prefix.length() - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void
|
||||||
|
consolidate_env_entries(std::vector<char*>& envp)
|
||||||
|
{
|
||||||
|
constexpr char delim = ':';
|
||||||
|
|
||||||
|
struct key_data
|
||||||
|
{
|
||||||
|
std::vector<std::string> parts;
|
||||||
|
std::unordered_set<std::string> seen;
|
||||||
|
|
||||||
|
void add_unique(std::string part)
|
||||||
|
{
|
||||||
|
if(!part.empty() && seen.insert(part).second)
|
||||||
|
parts.emplace_back(std::move(part));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
auto parse_entry = [](std::string_view entry)
|
||||||
|
-> std::optional<std::pair<std::string_view, std::string_view>> {
|
||||||
|
auto eq_pos = entry.find('=');
|
||||||
|
if(eq_pos == std::string_view::npos) return std::nullopt;
|
||||||
|
return std::make_pair(entry.substr(0, eq_pos), entry.substr(eq_pos + 1));
|
||||||
|
};
|
||||||
|
|
||||||
|
auto join_parts = [delim](std::string_view key,
|
||||||
|
const std::vector<std::string>& parts) {
|
||||||
|
std::string result;
|
||||||
|
|
||||||
|
const auto total_parts_length = std::accumulate(
|
||||||
|
parts.begin(), parts.end(), std::size_t{ 0 },
|
||||||
|
[](std::size_t acc, const std::string& part) { return acc + part.size(); });
|
||||||
|
|
||||||
|
const auto delim_count = parts.size() - 1;
|
||||||
|
const auto equal_sign_length = 1;
|
||||||
|
|
||||||
|
result.reserve(key.size() + equal_sign_length + total_parts_length + delim_count);
|
||||||
|
result.append(key);
|
||||||
|
result += '=';
|
||||||
|
|
||||||
|
result =
|
||||||
|
std::accumulate(parts.begin(), parts.end(), std::move(result),
|
||||||
|
[delim, &parts](std::string acc, const std::string& part) {
|
||||||
|
if(part != parts.front()) acc += delim;
|
||||||
|
acc.append(part);
|
||||||
|
return acc;
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::unordered_map<std::string_view, key_data> key_map;
|
||||||
|
std::vector<std::string_view> key_order;
|
||||||
|
|
||||||
|
for(auto* entry : envp)
|
||||||
|
{
|
||||||
|
if(!entry)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto parsed = parse_entry(entry);
|
||||||
|
if(!parsed)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto [key, value] = *parsed;
|
||||||
|
|
||||||
|
auto [it, inserted] = key_map.try_emplace(key);
|
||||||
|
if(inserted)
|
||||||
|
{
|
||||||
|
key_order.emplace_back(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& data = it->second;
|
||||||
|
std::istringstream stream{ std::string{ value } };
|
||||||
|
for(std::string part; std::getline(stream, part, delim);)
|
||||||
|
{
|
||||||
|
data.add_unique(part);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<char*> result;
|
||||||
|
result.reserve(key_order.size());
|
||||||
|
|
||||||
|
for(auto key : key_order)
|
||||||
|
{
|
||||||
|
result.emplace_back(strdup(join_parts(key, key_map[key].parts).c_str()));
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto* entry : envp)
|
||||||
|
{
|
||||||
|
std::free(entry);
|
||||||
|
entry = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
envp = std::move(result);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace common
|
} // namespace common
|
||||||
} // namespace rocprofsys
|
} // namespace rocprofsys
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ add_library(
|
|||||||
lib-common-tests
|
lib-common-tests
|
||||||
OBJECT
|
OBJECT
|
||||||
test_discover_llvm_libdir.cpp
|
test_discover_llvm_libdir.cpp
|
||||||
|
test_environment.cpp
|
||||||
test_path.cpp
|
test_path.cpp
|
||||||
test_remove_env.cpp
|
test_remove_env.cpp
|
||||||
test_update_env.cpp
|
test_update_env.cpp
|
||||||
|
|||||||
@@ -0,0 +1,146 @@
|
|||||||
|
// Copyright (c) Advanced Micro Devices, Inc.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#include "common/environment.hpp"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
using namespace rocprofsys::common;
|
||||||
|
|
||||||
|
class IsPythonInterpreterTest : public ::testing::Test
|
||||||
|
{};
|
||||||
|
|
||||||
|
TEST_F(IsPythonInterpreterTest, RecognizesPython)
|
||||||
|
{
|
||||||
|
EXPECT_TRUE(is_python_interpreter("python"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("python3"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("python3.8"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("python3.9"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("python3.10"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("python3.11"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("python3.12"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("/usr/bin/python"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("/usr/bin/python3"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("/usr/bin/python3.10"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("/home/user/venv/bin/python"));
|
||||||
|
EXPECT_TRUE(is_python_interpreter("/opt/conda/bin/python3.11"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("bash"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("sh"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("ruby"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("node"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("java"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("/usr/bin/bash"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("./my_app"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("pythonista"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("python_script.py"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("mypython"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("python2"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("python3."));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("python3.a"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("python3.10a"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("python3x10"));
|
||||||
|
EXPECT_FALSE(is_python_interpreter(""));
|
||||||
|
EXPECT_FALSE(is_python_interpreter("/usr/bin/"));
|
||||||
|
}
|
||||||
|
|
||||||
|
class DuplicatedEnvironmentEntriesTest : public ::testing::Test
|
||||||
|
{};
|
||||||
|
|
||||||
|
TEST_F(DuplicatedEnvironmentEntriesTest, DuplicateEnvironmentEntries)
|
||||||
|
{
|
||||||
|
std::vector<char*> env_vars = {
|
||||||
|
strdup("PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/bin2"),
|
||||||
|
strdup("PATH=/usr/local/bin:/usr/bin:/bin"),
|
||||||
|
};
|
||||||
|
|
||||||
|
consolidate_env_entries(env_vars);
|
||||||
|
|
||||||
|
ASSERT_EQ(env_vars.size(), 1);
|
||||||
|
EXPECT_STREQ(env_vars[0], "PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/bin2");
|
||||||
|
|
||||||
|
for(auto* entry : env_vars)
|
||||||
|
free(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DuplicatedEnvironmentEntriesTest, HandlesEmptyVector)
|
||||||
|
{
|
||||||
|
std::vector<char*> env_vars;
|
||||||
|
consolidate_env_entries(env_vars);
|
||||||
|
EXPECT_TRUE(env_vars.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DuplicatedEnvironmentEntriesTest, HandlesNullEntries)
|
||||||
|
{
|
||||||
|
std::vector<char*> env_vars = {
|
||||||
|
strdup("PATH=/usr/bin"),
|
||||||
|
nullptr,
|
||||||
|
strdup("PATH=/bin"),
|
||||||
|
};
|
||||||
|
consolidate_env_entries(env_vars);
|
||||||
|
ASSERT_EQ(env_vars.size(), 1);
|
||||||
|
EXPECT_STREQ(env_vars[0], "PATH=/usr/bin:/bin");
|
||||||
|
for(auto* entry : env_vars)
|
||||||
|
std::free(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DuplicatedEnvironmentEntriesTest, HandlesEmptyValues)
|
||||||
|
{
|
||||||
|
std::vector<char*> env_vars = {
|
||||||
|
strdup("EMPTY_VAR="),
|
||||||
|
strdup("PATH=/usr/bin"),
|
||||||
|
};
|
||||||
|
consolidate_env_entries(env_vars);
|
||||||
|
ASSERT_EQ(env_vars.size(), 2);
|
||||||
|
|
||||||
|
for(auto* entry : env_vars)
|
||||||
|
std::free(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
class AddTorchLibraryPathTest : public ::testing::Test
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
std::unordered_set<std::string> updated_envs;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(AddTorchLibraryPathTest, SkipsNonPythonExecutables)
|
||||||
|
{
|
||||||
|
std::vector<char*> envp = {
|
||||||
|
strdup("LD_LIBRARY_PATH=/usr/lib"),
|
||||||
|
};
|
||||||
|
std::vector<char*> argv = {
|
||||||
|
strdup("/usr/bin/bash"),
|
||||||
|
};
|
||||||
|
add_torch_library_path(envp, argv, false, updated_envs);
|
||||||
|
// Should not modify environment
|
||||||
|
ASSERT_EQ(envp.size(), 1);
|
||||||
|
EXPECT_STREQ(envp[0], "LD_LIBRARY_PATH=/usr/lib");
|
||||||
|
for(auto* entry : envp)
|
||||||
|
std::free(entry);
|
||||||
|
for(auto* entry : argv)
|
||||||
|
std::free(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(AddTorchLibraryPathTest, HandlesEmptyArgv)
|
||||||
|
{
|
||||||
|
std::vector<char*> envp = {
|
||||||
|
strdup("LD_LIBRARY_PATH=/usr/lib"),
|
||||||
|
};
|
||||||
|
std::vector<char*> argv;
|
||||||
|
add_torch_library_path(envp, argv, false, updated_envs);
|
||||||
|
ASSERT_EQ(envp.size(), 1);
|
||||||
|
EXPECT_STREQ(envp[0], "LD_LIBRARY_PATH=/usr/lib");
|
||||||
|
for(auto* entry : envp)
|
||||||
|
std::free(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(AddTorchLibraryPathTest, HandlesNullArgvFront)
|
||||||
|
{
|
||||||
|
std::vector<char*> envp = {
|
||||||
|
strdup("LD_LIBRARY_PATH=/usr/lib"),
|
||||||
|
};
|
||||||
|
std::vector<char*> argv = { nullptr };
|
||||||
|
add_torch_library_path(envp, argv, false, updated_envs);
|
||||||
|
ASSERT_EQ(envp.size(), 1);
|
||||||
|
for(auto* entry : envp)
|
||||||
|
std::free(entry);
|
||||||
|
}
|
||||||
@@ -168,6 +168,14 @@ add_ld_library_path(parser_data& _data)
|
|||||||
return _data;
|
return _data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parser_data&
|
||||||
|
add_torch_library_path(parser_data& _data, bool verbose)
|
||||||
|
{
|
||||||
|
rocprofsys::common::add_torch_library_path(_data.current, _data.command, verbose,
|
||||||
|
_data.updated);
|
||||||
|
return _data;
|
||||||
|
}
|
||||||
|
|
||||||
parser_data&
|
parser_data&
|
||||||
add_core_arguments(parser_t& _parser, parser_data& _data)
|
add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -83,6 +83,9 @@ add_ld_preload(parser_data&);
|
|||||||
parser_data&
|
parser_data&
|
||||||
add_ld_library_path(parser_data&);
|
add_ld_library_path(parser_data&);
|
||||||
|
|
||||||
|
parser_data&
|
||||||
|
add_torch_library_path(parser_data&, bool verbose = false);
|
||||||
|
|
||||||
parser_data&
|
parser_data&
|
||||||
add_core_arguments(parser_t&, parser_data&);
|
add_core_arguments(parser_t&, parser_data&);
|
||||||
|
|
||||||
|
|||||||
@@ -316,7 +316,9 @@ configure_settings(bool _init)
|
|||||||
"backend", "perfetto");
|
"backend", "perfetto");
|
||||||
|
|
||||||
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE_LEGACY",
|
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE_LEGACY",
|
||||||
"Use legacy direct mode for perfetto tracing instead of "
|
"[DEPRECATED] The new default option is to use data from "
|
||||||
|
"cached buffer. When set to true system will use "
|
||||||
|
"legacy direct mode for perfetto tracing instead of "
|
||||||
"deferred trace generation. When false (default), uses "
|
"deferred trace generation. When false (default), uses "
|
||||||
"cached mode with minimal runtime overhead.",
|
"cached mode with minimal runtime overhead.",
|
||||||
false, "backend", "perfetto");
|
false, "backend", "perfetto");
|
||||||
@@ -1088,6 +1090,9 @@ configure_settings(bool _init)
|
|||||||
handle_deprecated_setting("ROCPROFSYS_OUTPUT_FILE", "ROCPROFSYS_PERFETTO_FILE");
|
handle_deprecated_setting("ROCPROFSYS_OUTPUT_FILE", "ROCPROFSYS_PERFETTO_FILE");
|
||||||
handle_deprecated_setting("ROCPROFSYS_USE_PERFETTO", "ROCPROFSYS_TRACE");
|
handle_deprecated_setting("ROCPROFSYS_USE_PERFETTO", "ROCPROFSYS_TRACE");
|
||||||
handle_deprecated_setting("ROCPROFSYS_USE_TIMEMORY", "ROCPROFSYS_PROFILE");
|
handle_deprecated_setting("ROCPROFSYS_USE_TIMEMORY", "ROCPROFSYS_PROFILE");
|
||||||
|
handle_deprecated_setting("ROCPROFSYS_DEBUG", "ROCPROFSYS_LOG_LEVEL");
|
||||||
|
handle_deprecated_setting("ROCPROFSYS_VERBOSE", "ROCPROFSYS_LOG_LEVEL");
|
||||||
|
handle_deprecated_setting("ROCPROFSYS_TRACE_LEGACY", "ROCPROFSYS_TRACE");
|
||||||
|
|
||||||
scope::get_fields()[scope::flat::value] = _config->get_flat_profile();
|
scope::get_fields()[scope::flat::value] = _config->get_flat_profile();
|
||||||
scope::get_fields()[scope::timeline::value] = _config->get_timeline_profile();
|
scope::get_fields()[scope::timeline::value] = _config->get_timeline_profile();
|
||||||
|
|||||||
@@ -31,6 +31,8 @@
|
|||||||
} \
|
} \
|
||||||
} // namespace ::tim::cereal
|
} // namespace ::tim::cereal
|
||||||
|
|
||||||
|
#include "common/defines.h"
|
||||||
|
|
||||||
#if !defined(ROCPROFSYS_USE_ROCM)
|
#if !defined(ROCPROFSYS_USE_ROCM)
|
||||||
# define ROCPROFSYS_USE_ROCM 0
|
# define ROCPROFSYS_USE_ROCM 0
|
||||||
#endif
|
#endif
|
||||||
@@ -40,7 +42,6 @@
|
|||||||
|
|
||||||
#include <timemory/manager.hpp>
|
#include <timemory/manager.hpp>
|
||||||
|
|
||||||
#include <dlfcn.h>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "core/agent_manager.hpp"
|
#include "core/agent_manager.hpp"
|
||||||
@@ -92,17 +93,6 @@ _amdsmi_is_initialized()
|
|||||||
return initialized;
|
return initialized;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
prevent_amdsmi_library_unload()
|
|
||||||
{
|
|
||||||
static bool _initialized = false;
|
|
||||||
if(_initialized) return;
|
|
||||||
_initialized = true;
|
|
||||||
|
|
||||||
dlopen("libamd_smi.so", RTLD_NOW | RTLD_NOLOAD | RTLD_NODELETE);
|
|
||||||
dlopen("librocm_smi64.so", RTLD_NOW | RTLD_NOLOAD | RTLD_NODELETE);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
amdsmi_init()
|
amdsmi_init()
|
||||||
{
|
{
|
||||||
@@ -113,8 +103,6 @@ amdsmi_init()
|
|||||||
ROCPROFSYS_AMD_SMI_CALL(::amdsmi_init(AMDSMI_INIT_AMD_GPUS));
|
ROCPROFSYS_AMD_SMI_CALL(::amdsmi_init(AMDSMI_INIT_AMD_GPUS));
|
||||||
get_processor_handles();
|
get_processor_handles();
|
||||||
_amdsmi_is_initialized() = true; // Mark as initialized
|
_amdsmi_is_initialized() = true; // Mark as initialized
|
||||||
|
|
||||||
prevent_amdsmi_library_unload();
|
|
||||||
} catch(std::exception& _e)
|
} catch(std::exception& _e)
|
||||||
{
|
{
|
||||||
LOG_ERROR("Exception thrown initializing amd-smi: {}", _e.what());
|
LOG_ERROR("Exception thrown initializing amd-smi: {}", _e.what());
|
||||||
|
|||||||
@@ -367,7 +367,6 @@ config_settings(const std::shared_ptr<settings>& _config)
|
|||||||
|
|
||||||
_skip_domains.emplace("kernel_dispatch");
|
_skip_domains.emplace("kernel_dispatch");
|
||||||
_skip_domains.emplace("page_migration");
|
_skip_domains.emplace("page_migration");
|
||||||
_skip_domains.emplace("scratch_memory");
|
|
||||||
|
|
||||||
_add_operation_settings(
|
_add_operation_settings(
|
||||||
"MARKER_API", callback_tracing_info[ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API],
|
"MARKER_API", callback_tracing_info[ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API],
|
||||||
@@ -652,7 +651,7 @@ get_backtrace_operations(rocprofiler_callback_tracing_kind_t kindv)
|
|||||||
{
|
{
|
||||||
if(callback_operation_option_names.count(kindv) == 0)
|
if(callback_operation_option_names.count(kindv) == 0)
|
||||||
{
|
{
|
||||||
LOG_CRITICAL("callback_operation_operation_names does not have value for {}",
|
LOG_CRITICAL("callback_operation_option_names does not have value for {}",
|
||||||
static_cast<int>(kindv));
|
static_cast<int>(kindv));
|
||||||
::rocprofsys::set_state(::rocprofsys::State::Finalized);
|
::rocprofsys::set_state(::rocprofsys::State::Finalized);
|
||||||
std::abort();
|
std::abort();
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ using storage_parser_t =
|
|||||||
storage_parser<type_identifier_t, kernel_dispatch_sample, memory_copy_sample,
|
storage_parser<type_identifier_t, kernel_dispatch_sample, memory_copy_sample,
|
||||||
memory_allocate_sample, region_sample, in_time_sample,
|
memory_allocate_sample, region_sample, in_time_sample,
|
||||||
pmc_event_with_sample, amd_smi_sample, cpu_freq_sample,
|
pmc_event_with_sample, amd_smi_sample, cpu_freq_sample,
|
||||||
backtrace_region_sample>;
|
backtrace_region_sample, scratch_memory_sample>;
|
||||||
|
|
||||||
using buffer_storage_t = buffer_storage<flush_worker_factory_t, type_identifier_t>;
|
using buffer_storage_t = buffer_storage<flush_worker_factory_t, type_identifier_t>;
|
||||||
|
|
||||||
|
|||||||
@@ -540,6 +540,72 @@ perfetto_processor_t::handle([[maybe_unused]] const kernel_dispatch_sample& _kds
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
perfetto_processor_t::handle([[maybe_unused]] const scratch_memory_sample& _sms)
|
||||||
|
{
|
||||||
|
#if ROCPROFSYS_USE_ROCM > 0
|
||||||
|
auto _corr_id = _sms.correlation_id_internal;
|
||||||
|
auto _stream_id = _sms.stream_handle;
|
||||||
|
auto _queue_id_handle = _sms.queue_id_handle;
|
||||||
|
const auto& _t_info = thread_info::get(_sms.thread_id, SystemTID);
|
||||||
|
const auto _thread_id_sequent = _t_info->index_data->sequent_value;
|
||||||
|
auto _beg_ts = _sms.start_timestamp;
|
||||||
|
auto _end_ts = _sms.end_timestamp;
|
||||||
|
|
||||||
|
auto _agent_device_id =
|
||||||
|
m_agent_manager.get_agent_by_handle(_sms.agent_id_handle).device_type_index;
|
||||||
|
auto _name = std::string{ m_metadata.get_buffer_name_info().at(
|
||||||
|
static_cast<rocprofiler_buffer_tracing_kind_t>(_sms.kind),
|
||||||
|
static_cast<rocprofiler_tracing_operation_t>(_sms.operation)) };
|
||||||
|
|
||||||
|
// Scratch memory samples from SDK versions prior to 7.0.2 do not include
|
||||||
|
// allocation_size field, so counter tracks are not needed
|
||||||
|
# if ROCPROFSYS_ROCM_VERSION >= 70002
|
||||||
|
using counter_track =
|
||||||
|
perfetto_counter_track<rocprofiler_buffer_tracing_scratch_memory_record_t>;
|
||||||
|
|
||||||
|
if(!counter_track::exists(_agent_device_id))
|
||||||
|
{
|
||||||
|
auto _track_desc_alloc_size = JOIN("", "GPU Scratch Memory [", _agent_device_id,
|
||||||
|
"] Thread ", _thread_id_sequent);
|
||||||
|
counter_track::emplace(_agent_device_id, _track_desc_alloc_size, "bytes");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(_sms.operation == ROCPROFILER_SCRATCH_MEMORY_ALLOC)
|
||||||
|
{
|
||||||
|
TRACE_COUNTER("rocm_scratch_memory", counter_track::at(_agent_device_id, 0),
|
||||||
|
_beg_ts, _sms.allocation_size);
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
auto _track_desc_events = [&]() {
|
||||||
|
return JOIN("", "GPU Scratch Memory Events Thread ", _thread_id_sequent);
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto _track =
|
||||||
|
tracing::get_perfetto_track(category::rocm_scratch_memory{}, _track_desc_events);
|
||||||
|
|
||||||
|
auto add_perfetto_annotations = [&](::perfetto::EventContext ctx) {
|
||||||
|
if(!m_use_annotations) return;
|
||||||
|
|
||||||
|
annotate_perfetto(ctx, { { "begin_ns", _beg_ts },
|
||||||
|
{ "end_ns", _end_ts },
|
||||||
|
{ "corr_id", _corr_id },
|
||||||
|
{ "stream_id", _stream_id },
|
||||||
|
{ "queue", _queue_id_handle },
|
||||||
|
{ "allocation_size", _sms.allocation_size },
|
||||||
|
{ "agent_id", _agent_device_id },
|
||||||
|
{ "operation", _name },
|
||||||
|
{ "flags", _sms.flags } });
|
||||||
|
};
|
||||||
|
|
||||||
|
tracing::push_perfetto(category::rocm_scratch_memory{}, _name.c_str(), _track,
|
||||||
|
_beg_ts, ::perfetto::Flow::ProcessScoped(_corr_id),
|
||||||
|
add_perfetto_annotations);
|
||||||
|
tracing::pop_perfetto(category::rocm_scratch_memory{}, "", _track, _end_ts);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
perfetto_processor_t::handle([[maybe_unused]] const memory_copy_sample& _mcs)
|
perfetto_processor_t::handle([[maybe_unused]] const memory_copy_sample& _mcs)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ public:
|
|||||||
void finalize_processing();
|
void finalize_processing();
|
||||||
|
|
||||||
void handle(const kernel_dispatch_sample& sample);
|
void handle(const kernel_dispatch_sample& sample);
|
||||||
|
void handle(const scratch_memory_sample& sample);
|
||||||
void handle(const memory_copy_sample& sample);
|
void handle(const memory_copy_sample& sample);
|
||||||
void handle(const memory_allocate_sample& sample);
|
void handle(const memory_allocate_sample& sample);
|
||||||
void handle(const region_sample& sample);
|
void handle(const region_sample& sample);
|
||||||
|
|||||||
@@ -66,6 +66,37 @@ get_handle_from_code_object(
|
|||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if ROCPROFSYS_USE_ROCM > 0
|
||||||
|
using memory_operation = std::string;
|
||||||
|
using memory_type = std::string;
|
||||||
|
std::pair<memory_operation, memory_type>
|
||||||
|
parse_memory_operation_name(std::string_view memory_operation_name)
|
||||||
|
{
|
||||||
|
static const std::unordered_map<std::string_view,
|
||||||
|
std::pair<memory_operation, memory_type>>
|
||||||
|
parsing_map{
|
||||||
|
{ "MEMORY_ALLOCATION_NONE", { "NONE", "REAL" } },
|
||||||
|
{ "MEMORY_ALLOCATION_ALLOCATE", { "ALLOC", "REAL" } },
|
||||||
|
{ "MEMORY_ALLOCATION_VMEM_ALLOCATE", { "ALLOC", "VIRTUAL" } },
|
||||||
|
{ "MEMORY_ALLOCATION_FREE", { "FREE", "REAL" } },
|
||||||
|
{ "MEMORY_ALLOCATION_VMEM_FREE", { "FREE", "VIRTUAL" } },
|
||||||
|
{ "SCRATCH_MEMORY_NONE", { "NONE", "SCRATCH" } },
|
||||||
|
{ "SCRATCH_MEMORY_ALLOC", { "ALLOC", "SCRATCH" } },
|
||||||
|
{ "SCRATCH_MEMORY_FREE", { "FREE", "SCRATCH" } },
|
||||||
|
{ "SCRATCH_MEMORY_ASYNC_RECLAIM", { "ASYNC_RECLAIM", "SCRATCH" } },
|
||||||
|
};
|
||||||
|
|
||||||
|
auto item = parsing_map.find(memory_operation_name);
|
||||||
|
if(item == parsing_map.end())
|
||||||
|
{
|
||||||
|
LOG_WARNING("Unknown memory operation name: {}", memory_operation_name);
|
||||||
|
return { "UNKNOWN", "UNKNOWN" };
|
||||||
|
}
|
||||||
|
|
||||||
|
return item->second;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -110,6 +141,46 @@ rocpd_processor_t::handle([[maybe_unused]] const kernel_dispatch_sample& _kds)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
rocpd_processor_t::handle([[maybe_unused]] const scratch_memory_sample& _sms)
|
||||||
|
{
|
||||||
|
#if ROCPROFSYS_USE_ROCM > 0
|
||||||
|
auto& n_info = node_info::get_instance();
|
||||||
|
auto process = m_metadata->get_process_info();
|
||||||
|
|
||||||
|
const auto* _name = m_metadata->get_buffer_name_info().at(
|
||||||
|
static_cast<rocprofiler_buffer_tracing_kind_t>(_sms.kind),
|
||||||
|
static_cast<rocprofiler_tracing_operation_t>(_sms.operation));
|
||||||
|
|
||||||
|
auto agent_primary_key =
|
||||||
|
m_agent_manager->get_agent_by_handle(_sms.agent_id_handle).base_id;
|
||||||
|
|
||||||
|
auto thread_primary_key =
|
||||||
|
m_data_processor->map_thread_id_to_primary_key(_sms.thread_id);
|
||||||
|
|
||||||
|
auto category_primary_key = m_data_processor->insert_string(
|
||||||
|
trait::name<category::rocm_scratch_memory>::value);
|
||||||
|
|
||||||
|
auto stack_id = _sms.correlation_id_internal;
|
||||||
|
auto parent_stack_id = _sms.correlation_id_ancestor;
|
||||||
|
auto correlation_id = 0;
|
||||||
|
auto address_value = 0;
|
||||||
|
|
||||||
|
auto event_primary_key = m_data_processor->insert_event(
|
||||||
|
category_primary_key, stack_id, parent_stack_id, correlation_id);
|
||||||
|
|
||||||
|
auto [memory_operation, memory_type] = parse_memory_operation_name(_name);
|
||||||
|
|
||||||
|
auto extdata_json_str = JOIN("", "{\"flags\": ", _sms.flags, "}");
|
||||||
|
|
||||||
|
m_data_processor->insert_memory_alloc(
|
||||||
|
n_info.id, process.pid, thread_primary_key, agent_primary_key,
|
||||||
|
memory_operation.c_str(), memory_type.c_str(), _sms.start_timestamp,
|
||||||
|
_sms.end_timestamp, address_value, _sms.allocation_size, _sms.queue_id_handle,
|
||||||
|
_sms.stream_handle, event_primary_key, extdata_json_str.c_str());
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
rocpd_processor_t::handle([[maybe_unused]] const memory_copy_sample& _mcs)
|
rocpd_processor_t::handle([[maybe_unused]] const memory_copy_sample& _mcs)
|
||||||
{
|
{
|
||||||
@@ -153,46 +224,6 @@ void
|
|||||||
rocpd_processor_t::handle([[maybe_unused]] const memory_allocate_sample& _mas)
|
rocpd_processor_t::handle([[maybe_unused]] const memory_allocate_sample& _mas)
|
||||||
{
|
{
|
||||||
#if ROCPROFSYS_USE_ROCM > 0 && (ROCPROFILER_VERSION >= 600)
|
#if ROCPROFSYS_USE_ROCM > 0 && (ROCPROFILER_VERSION >= 600)
|
||||||
static auto memtype_to_db =
|
|
||||||
[](std::string_view memory_type) -> std::pair<std::string, std::string> {
|
|
||||||
constexpr auto MEMORY_PREFIX = std::string_view{ "MEMORY_ALLOCATION_" };
|
|
||||||
constexpr auto SCRATCH_PREFIX = std::string_view{ "SCRATCH_MEMORY_" };
|
|
||||||
constexpr auto VMEM_PREFIX = std::string_view{ "VMEM_" };
|
|
||||||
constexpr auto ASYNC_PREFIX = std::string_view{ "ASYNC_" };
|
|
||||||
|
|
||||||
std::string _type;
|
|
||||||
std::string _level;
|
|
||||||
if(memory_type.find(MEMORY_PREFIX) == 0)
|
|
||||||
{
|
|
||||||
_type = memory_type.substr(MEMORY_PREFIX.length());
|
|
||||||
if(_type.find(VMEM_PREFIX) == 0)
|
|
||||||
{
|
|
||||||
_type = _type.substr(VMEM_PREFIX.length());
|
|
||||||
_level = "VIRTUAL";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
_level = "REAL";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if(memory_type.find(SCRATCH_PREFIX) == 0)
|
|
||||||
{
|
|
||||||
_type = memory_type.substr(SCRATCH_PREFIX.length());
|
|
||||||
_level = "SCRATCH";
|
|
||||||
if(memory_type.find(ASYNC_PREFIX) == 0)
|
|
||||||
{
|
|
||||||
_type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(_type == "ALLOCATE")
|
|
||||||
{
|
|
||||||
_type = "ALLOC";
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::make_pair(_type, _level);
|
|
||||||
};
|
|
||||||
|
|
||||||
auto& n_info = node_info::get_instance();
|
auto& n_info = node_info::get_instance();
|
||||||
auto process = m_metadata->get_process_info();
|
auto process = m_metadata->get_process_info();
|
||||||
auto thread_primary_key =
|
auto thread_primary_key =
|
||||||
@@ -210,7 +241,7 @@ rocpd_processor_t::handle([[maybe_unused]] const memory_allocate_sample& _mas)
|
|||||||
static_cast<rocprofiler_buffer_tracing_kind_t>(_mas.kind),
|
static_cast<rocprofiler_buffer_tracing_kind_t>(_mas.kind),
|
||||||
static_cast<rocprofiler_tracing_operation_t>(_mas.operation));
|
static_cast<rocprofiler_tracing_operation_t>(_mas.operation));
|
||||||
|
|
||||||
auto [type, level] = memtype_to_db(_name);
|
auto [memory_operation, memory_type] = parse_memory_operation_name(_name);
|
||||||
|
|
||||||
auto stack_id = _mas.correlation_id_internal;
|
auto stack_id = _mas.correlation_id_internal;
|
||||||
auto parent_stack_id = _mas.correlation_id_ancestor;
|
auto parent_stack_id = _mas.correlation_id_ancestor;
|
||||||
@@ -224,9 +255,10 @@ rocpd_processor_t::handle([[maybe_unused]] const memory_allocate_sample& _mas)
|
|||||||
category_primary_key, stack_id, parent_stack_id, correlation_id);
|
category_primary_key, stack_id, parent_stack_id, correlation_id);
|
||||||
|
|
||||||
m_data_processor->insert_memory_alloc(
|
m_data_processor->insert_memory_alloc(
|
||||||
n_info.id, process.pid, thread_primary_key, agent_primary_key, type.c_str(),
|
n_info.id, process.pid, thread_primary_key, agent_primary_key,
|
||||||
level.c_str(), _mas.start_timestamp, _mas.end_timestamp, _mas.address_value,
|
memory_operation.c_str(), memory_type.c_str(), _mas.start_timestamp,
|
||||||
_mas.allocation_size, queue_id, _mas.stream_handle, event_primary_key);
|
_mas.end_timestamp, _mas.address_value, _mas.allocation_size, queue_id,
|
||||||
|
_mas.stream_handle, event_primary_key);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ public:
|
|||||||
void finalize_processing();
|
void finalize_processing();
|
||||||
|
|
||||||
void handle(const kernel_dispatch_sample& sample);
|
void handle(const kernel_dispatch_sample& sample);
|
||||||
|
void handle(const scratch_memory_sample& sample);
|
||||||
void handle(const memory_copy_sample& sample);
|
void handle(const memory_copy_sample& sample);
|
||||||
void handle(const memory_allocate_sample& sample);
|
void handle(const memory_allocate_sample& sample);
|
||||||
void handle(const region_sample& sample);
|
void handle(const region_sample& sample);
|
||||||
|
|||||||
@@ -43,6 +43,11 @@ struct processor_t
|
|||||||
static_cast<T*>(this)->handle(sample);
|
static_cast<T*>(this)->handle(sample);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void handle(const scratch_memory_sample& sample)
|
||||||
|
{
|
||||||
|
static_cast<T*>(this)->handle(sample);
|
||||||
|
}
|
||||||
|
|
||||||
void handle(const memory_copy_sample& sample)
|
void handle(const memory_copy_sample& sample)
|
||||||
{
|
{
|
||||||
static_cast<T*>(this)->handle(sample);
|
static_cast<T*>(this)->handle(sample);
|
||||||
@@ -84,6 +89,7 @@ protected:
|
|||||||
struct processor_view_t
|
struct processor_view_t
|
||||||
{
|
{
|
||||||
using kernel_dispatch_fn_t = void (*)(void*, const kernel_dispatch_sample&) noexcept;
|
using kernel_dispatch_fn_t = void (*)(void*, const kernel_dispatch_sample&) noexcept;
|
||||||
|
using scratch_memory_fn_t = void (*)(void*, const scratch_memory_sample&) noexcept;
|
||||||
using memory_copy_fn_t = void (*)(void*, const memory_copy_sample&) noexcept;
|
using memory_copy_fn_t = void (*)(void*, const memory_copy_sample&) noexcept;
|
||||||
#if(ROCPROFILER_VERSION >= 600)
|
#if(ROCPROFILER_VERSION >= 600)
|
||||||
using memory_allocate_fn_t = void (*)(void*, const memory_allocate_sample&) noexcept;
|
using memory_allocate_fn_t = void (*)(void*, const memory_allocate_sample&) noexcept;
|
||||||
@@ -101,6 +107,7 @@ struct processor_view_t
|
|||||||
struct vtable_t
|
struct vtable_t
|
||||||
{
|
{
|
||||||
kernel_dispatch_fn_t handle_kernel_dispatch;
|
kernel_dispatch_fn_t handle_kernel_dispatch;
|
||||||
|
scratch_memory_fn_t handle_scratch_memory;
|
||||||
memory_copy_fn_t handle_memory_copy;
|
memory_copy_fn_t handle_memory_copy;
|
||||||
#if(ROCPROFILER_VERSION >= 600)
|
#if(ROCPROFILER_VERSION >= 600)
|
||||||
memory_allocate_fn_t handle_memory_allocate;
|
memory_allocate_fn_t handle_memory_allocate;
|
||||||
@@ -134,6 +141,11 @@ struct processor_view_t
|
|||||||
m_vtable->handle_kernel_dispatch(m_object, sample);
|
m_vtable->handle_kernel_dispatch(m_object, sample);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ROCPROFSYS_INLINE void handle(const scratch_memory_sample& sample) const noexcept
|
||||||
|
{
|
||||||
|
m_vtable->handle_scratch_memory(m_object, sample);
|
||||||
|
}
|
||||||
|
|
||||||
ROCPROFSYS_INLINE void handle(const memory_copy_sample& sample) const noexcept
|
ROCPROFSYS_INLINE void handle(const memory_copy_sample& sample) const noexcept
|
||||||
{
|
{
|
||||||
m_vtable->handle_memory_copy(m_object, sample);
|
m_vtable->handle_memory_copy(m_object, sample);
|
||||||
@@ -194,6 +206,9 @@ private:
|
|||||||
+[](void* obj, const kernel_dispatch_sample& sample) noexcept {
|
+[](void* obj, const kernel_dispatch_sample& sample) noexcept {
|
||||||
static_cast<T*>(obj)->handle(sample);
|
static_cast<T*>(obj)->handle(sample);
|
||||||
},
|
},
|
||||||
|
+[](void* obj, const scratch_memory_sample& sample) noexcept {
|
||||||
|
static_cast<T*>(obj)->handle(sample);
|
||||||
|
},
|
||||||
+[](void* obj, const memory_copy_sample& sample) noexcept {
|
+[](void* obj, const memory_copy_sample& sample) noexcept {
|
||||||
static_cast<T*>(obj)->handle(sample);
|
static_cast<T*>(obj)->handle(sample);
|
||||||
},
|
},
|
||||||
@@ -275,6 +290,9 @@ struct sample_processor_t
|
|||||||
case type_identifier_t::kernel_dispatch:
|
case type_identifier_t::kernel_dispatch:
|
||||||
handle_sample(static_cast<const kernel_dispatch_sample&>(sample));
|
handle_sample(static_cast<const kernel_dispatch_sample&>(sample));
|
||||||
break;
|
break;
|
||||||
|
case type_identifier_t::scratch_memory:
|
||||||
|
handle_sample(static_cast<const scratch_memory_sample&>(sample));
|
||||||
|
break;
|
||||||
case type_identifier_t::memory_copy:
|
case type_identifier_t::memory_copy:
|
||||||
handle_sample(static_cast<const memory_copy_sample&>(sample));
|
handle_sample(static_cast<const memory_copy_sample&>(sample));
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ enum class type_identifier_t : uint32_t
|
|||||||
amd_smi_sample = 0x0006,
|
amd_smi_sample = 0x0006,
|
||||||
cpu_freq_sample = 0x0007,
|
cpu_freq_sample = 0x0007,
|
||||||
backtrace_region_sample = 0x0008,
|
backtrace_region_sample = 0x0008,
|
||||||
|
scratch_memory = 0x0009,
|
||||||
fragmented_space = 0xFFFF
|
fragmented_space = 0xFFFF
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -148,6 +149,83 @@ get_size(const kernel_dispatch_sample& item)
|
|||||||
item.grid_size_z, static_cast<uint64_t>(item.stream_handle));
|
item.grid_size_z, static_cast<uint64_t>(item.stream_handle));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct scratch_memory_sample : cacheable_t
|
||||||
|
{
|
||||||
|
static constexpr type_identifier_t type_identifier =
|
||||||
|
type_identifier_t::scratch_memory;
|
||||||
|
|
||||||
|
scratch_memory_sample() = default;
|
||||||
|
scratch_memory_sample(uint64_t _start_timestamp, uint64_t _end_timestamp,
|
||||||
|
uint64_t _thread_id, uint64_t _agent_id_handle,
|
||||||
|
uint64_t _queue_id_handle, int32_t _kind, int32_t _operation,
|
||||||
|
int32_t _flags, uint64_t _allocation_size,
|
||||||
|
uint64_t _correlation_id_internal,
|
||||||
|
uint64_t _correlation_id_ancestor, size_t _stream_handle)
|
||||||
|
: start_timestamp(_start_timestamp)
|
||||||
|
, end_timestamp(_end_timestamp)
|
||||||
|
, thread_id(_thread_id)
|
||||||
|
, agent_id_handle(_agent_id_handle)
|
||||||
|
, queue_id_handle(_queue_id_handle)
|
||||||
|
, kind(_kind)
|
||||||
|
, operation(_operation)
|
||||||
|
, flags(_flags)
|
||||||
|
, allocation_size(_allocation_size)
|
||||||
|
, correlation_id_internal(_correlation_id_internal)
|
||||||
|
, correlation_id_ancestor(_correlation_id_ancestor)
|
||||||
|
, stream_handle(_stream_handle)
|
||||||
|
{}
|
||||||
|
|
||||||
|
uint64_t start_timestamp;
|
||||||
|
uint64_t end_timestamp;
|
||||||
|
uint64_t thread_id;
|
||||||
|
uint64_t agent_id_handle;
|
||||||
|
uint64_t queue_id_handle;
|
||||||
|
int32_t kind;
|
||||||
|
int32_t operation;
|
||||||
|
int32_t flags;
|
||||||
|
uint64_t allocation_size;
|
||||||
|
uint64_t correlation_id_internal;
|
||||||
|
uint64_t correlation_id_ancestor;
|
||||||
|
size_t stream_handle;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void
|
||||||
|
serialize(uint8_t* buffer, const scratch_memory_sample& item)
|
||||||
|
{
|
||||||
|
utility::store_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||||
|
item.agent_id_handle, item.queue_id_handle, item.kind,
|
||||||
|
item.operation, item.flags, item.allocation_size,
|
||||||
|
item.correlation_id_internal, item.correlation_id_ancestor,
|
||||||
|
static_cast<uint64_t>(item.stream_handle));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline scratch_memory_sample
|
||||||
|
deserialize(uint8_t*& buffer)
|
||||||
|
{
|
||||||
|
scratch_memory_sample item;
|
||||||
|
uint64_t stream_handle;
|
||||||
|
utility::parse_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||||
|
item.agent_id_handle, item.queue_id_handle, item.kind,
|
||||||
|
item.operation, item.flags, item.allocation_size,
|
||||||
|
item.correlation_id_internal, item.correlation_id_ancestor,
|
||||||
|
stream_handle);
|
||||||
|
item.stream_handle = stream_handle;
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline size_t
|
||||||
|
get_size(const scratch_memory_sample& item)
|
||||||
|
{
|
||||||
|
return utility::get_size(item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||||
|
item.agent_id_handle, item.queue_id_handle, item.kind,
|
||||||
|
item.operation, item.flags, item.allocation_size,
|
||||||
|
item.correlation_id_internal, item.correlation_id_ancestor,
|
||||||
|
static_cast<uint64_t>(item.stream_handle));
|
||||||
|
}
|
||||||
|
|
||||||
struct memory_copy_sample : cacheable_t
|
struct memory_copy_sample : cacheable_t
|
||||||
{
|
{
|
||||||
static constexpr type_identifier_t type_identifier = type_identifier_t::memory_copy;
|
static constexpr type_identifier_t type_identifier = type_identifier_t::memory_copy;
|
||||||
|
|||||||
@@ -1272,7 +1272,10 @@ shutdown()
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
data::shutdown();
|
if(data::shutdown())
|
||||||
|
{
|
||||||
|
ROCPROFSYS_AMD_SMI_CALL(amdsmi_shut_down());
|
||||||
|
}
|
||||||
} catch(std::runtime_error& _e)
|
} catch(std::runtime_error& _e)
|
||||||
{
|
{
|
||||||
LOG_WARNING("Exception thrown when shutting down amd-smi: {}", _e.what());
|
LOG_WARNING("Exception thrown when shutting down amd-smi: {}", _e.what());
|
||||||
|
|||||||
+1
-22
@@ -24,6 +24,7 @@
|
|||||||
#include "core/common.hpp"
|
#include "core/common.hpp"
|
||||||
#include "core/config.hpp"
|
#include "core/config.hpp"
|
||||||
#include "core/state.hpp"
|
#include "core/state.hpp"
|
||||||
|
#include "core/timemory.hpp"
|
||||||
#include "library/runtime.hpp"
|
#include "library/runtime.hpp"
|
||||||
|
|
||||||
#include <timemory/backends/threading.hpp>
|
#include <timemory/backends/threading.hpp>
|
||||||
@@ -33,9 +34,7 @@
|
|||||||
#include "logger/debug.hpp"
|
#include "logger/debug.hpp"
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdio>
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
namespace rocprofsys
|
namespace rocprofsys
|
||||||
{
|
{
|
||||||
@@ -89,26 +88,6 @@ void
|
|||||||
exit_gotcha::operator()(const gotcha_data& _data, exit_func_t _func, int _ec) const
|
exit_gotcha::operator()(const gotcha_data& _data, exit_func_t _func, int _ec) const
|
||||||
{
|
{
|
||||||
_exit_info = { true, _data.tool_id.find("quick") != std::string::npos, _ec };
|
_exit_info = { true, _data.tool_id.find("quick") != std::string::npos, _ec };
|
||||||
|
|
||||||
if(config::get_use_amd_smi())
|
|
||||||
{
|
|
||||||
threading::clear_callbacks();
|
|
||||||
|
|
||||||
if(get_state() < ::rocprofsys::State::Finalized && !is_child_process())
|
|
||||||
{
|
|
||||||
LOG_DEBUG("Finalizing {} before calling {}({})...", get_exe_name(),
|
|
||||||
_data.tool_id, _ec);
|
|
||||||
|
|
||||||
rocprofsys_finalize();
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_DEBUG("Calling _exit({}) in {} to avoid AMD SMI cleanup issues...", _ec,
|
|
||||||
get_exe_name().c_str());
|
|
||||||
|
|
||||||
std::fflush(nullptr);
|
|
||||||
_exit(_ec);
|
|
||||||
}
|
|
||||||
|
|
||||||
invoke_exit_gotcha(_data, _func, _ec);
|
invoke_exit_gotcha(_data, _func, _ec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -567,6 +567,18 @@ get_mem_alloc_address(
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
get_scratch_mem_alloc_size(
|
||||||
|
[[maybe_unused]] const rocprofiler_buffer_tracing_scratch_memory_record_t& record)
|
||||||
|
{
|
||||||
|
// Scratch memory samples from SDK versions prior to 7.0.2 do not include allocation_size
|
||||||
|
#if(ROCPROFSYS_USE_ROCM > 0 && ROCPROFSYS_ROCM_VERSION >= 70002)
|
||||||
|
return record.allocation_size;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
cache_region(const rocprofiler_callback_tracing_record_t* record,
|
cache_region(const rocprofiler_callback_tracing_record_t* record,
|
||||||
const rocprofiler_timestamp_t start_timestamp,
|
const rocprofiler_timestamp_t start_timestamp,
|
||||||
@@ -615,13 +627,26 @@ cache_kernel_dispatch(rocprofiler_buffer_tracing_kernel_dispatch_record_t* recor
|
|||||||
record->dispatch_info.grid_size.z, stream_handle });
|
record->dispatch_info.grid_size.z, stream_handle });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
cache_scratch_memory(rocprofiler_buffer_tracing_scratch_memory_record_t* record,
|
||||||
|
uint64_t stream_handle)
|
||||||
|
{
|
||||||
|
trace_cache::get_metadata_registry().add_stream(stream_handle);
|
||||||
|
trace_cache::get_buffer_storage().store(trace_cache::scratch_memory_sample{
|
||||||
|
record->start_timestamp, record->end_timestamp, record->thread_id,
|
||||||
|
record->agent_id.handle, record->queue_id.handle,
|
||||||
|
static_cast<int32_t>(record->kind), static_cast<int32_t>(record->operation),
|
||||||
|
static_cast<int32_t>(record->flags), get_scratch_mem_alloc_size(*record),
|
||||||
|
record->correlation_id.internal, get_parent_stack_id(record->correlation_id),
|
||||||
|
stream_handle });
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record,
|
cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record,
|
||||||
uint64_t stream_handle)
|
uint64_t stream_handle)
|
||||||
{
|
{
|
||||||
trace_cache::get_metadata_registry().add_stream(stream_handle);
|
trace_cache::get_metadata_registry().add_stream(stream_handle);
|
||||||
trace_cache::get_buffer_storage().store(trace_cache::memory_copy_sample{
|
trace_cache::get_buffer_storage().store(trace_cache::memory_copy_sample{
|
||||||
|
|
||||||
record->start_timestamp, record->end_timestamp, record->thread_id,
|
record->start_timestamp, record->end_timestamp, record->thread_id,
|
||||||
record->dst_agent_id.handle, record->src_agent_id.handle,
|
record->dst_agent_id.handle, record->src_agent_id.handle,
|
||||||
static_cast<int32_t>(record->kind), static_cast<int32_t>(record->operation),
|
static_cast<int32_t>(record->kind), static_cast<int32_t>(record->operation),
|
||||||
@@ -1759,6 +1784,120 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY)
|
||||||
|
{
|
||||||
|
auto* record =
|
||||||
|
static_cast<rocprofiler_buffer_tracing_scratch_memory_record_t*>(
|
||||||
|
header->payload);
|
||||||
|
|
||||||
|
bool _group_by_queue = _default_group_by_queue;
|
||||||
|
|
||||||
|
const auto* agent = tool_data->get_gpu_tool_agent(record->agent_id);
|
||||||
|
auto device_id = static_cast<uint32_t>(agent->device_id);
|
||||||
|
|
||||||
|
const auto& t_info = thread_info::get(record->thread_id, SystemTID);
|
||||||
|
auto thread_id_sequent = t_info->index_data->sequent_value;
|
||||||
|
|
||||||
|
auto _corr_id = record->correlation_id.internal;
|
||||||
|
auto _beg_ns = record->start_timestamp;
|
||||||
|
auto _end_ns = record->end_timestamp;
|
||||||
|
auto _name =
|
||||||
|
tool_data->buffered_tracing_info.at(record->kind, record->operation);
|
||||||
|
|
||||||
|
auto _stream_id = get_stream_id(record).handle;
|
||||||
|
if(_stream_id == 0)
|
||||||
|
{
|
||||||
|
// Scratch memory event is not associated with a HIP stream
|
||||||
|
_group_by_queue = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto track_name = JOIN("", "GPU Scratch Memory [", device_id,
|
||||||
|
"] Thread ", record->thread_id);
|
||||||
|
cache_category<category::rocm_scratch_memory>();
|
||||||
|
cache_add_thread_info(record->thread_id);
|
||||||
|
cache_add_track(track_name.c_str(), record->thread_id);
|
||||||
|
cache_scratch_memory(record, _stream_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(get_use_timemory())
|
||||||
|
{
|
||||||
|
auto _bundle = kernel_dispatch_bundle_t{ _name };
|
||||||
|
|
||||||
|
_bundle.push(thread_id_sequent).start().stop();
|
||||||
|
_bundle.get([_beg_ns, _end_ns](tim::component::wall_clock* _wc) {
|
||||||
|
_wc->set_value(_end_ns - _beg_ns);
|
||||||
|
_wc->set_accum(_end_ns - _beg_ns);
|
||||||
|
});
|
||||||
|
_bundle.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
if(get_use_perfetto())
|
||||||
|
{
|
||||||
|
// Scratch memory samples from SDK versions prior to 7.0.2 do not include
|
||||||
|
// allocation_size field, so counter tracks are not needed
|
||||||
|
#if(ROCPROFSYS_USE_ROCM > 0 && ROCPROFSYS_ROCM_VERSION >= 70002)
|
||||||
|
using counter_track = perfetto_counter_track<
|
||||||
|
rocprofiler_buffer_tracing_scratch_memory_record_t>;
|
||||||
|
|
||||||
|
if(!counter_track::exists(device_id))
|
||||||
|
{
|
||||||
|
auto track_name_alloc_size =
|
||||||
|
JOIN("", "GPU Scratch Memory [", device_id, "] (S) Thread ",
|
||||||
|
thread_id_sequent);
|
||||||
|
counter_track::emplace(device_id, track_name_alloc_size, "bytes");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(record->operation == ROCPROFILER_SCRATCH_MEMORY_ALLOC)
|
||||||
|
{
|
||||||
|
TRACE_COUNTER("rocm_scratch_memory",
|
||||||
|
counter_track::at(device_id, 0), _beg_ns,
|
||||||
|
record->allocation_size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
auto add_perfetto_annotations = [&](::perfetto::EventContext ctx) {
|
||||||
|
if(config::get_perfetto_annotations())
|
||||||
|
{
|
||||||
|
tracing::add_perfetto_annotation(ctx, "begin_ns", _beg_ns);
|
||||||
|
tracing::add_perfetto_annotation(ctx, "end_ns", _end_ns);
|
||||||
|
tracing::add_perfetto_annotation(ctx, "corr_id", _corr_id);
|
||||||
|
tracing::add_perfetto_annotation(ctx, "stream_id",
|
||||||
|
_stream_id);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if(_group_by_queue)
|
||||||
|
{
|
||||||
|
auto track_name_events = [&]() {
|
||||||
|
return JOIN("", "GPU Scratch Memory (S) Events Thread ",
|
||||||
|
thread_id_sequent);
|
||||||
|
};
|
||||||
|
const auto _track = tracing::get_perfetto_track(
|
||||||
|
category::rocm_scratch_memory{}, track_name_events);
|
||||||
|
|
||||||
|
tracing::push_perfetto(category::rocm_scratch_memory{},
|
||||||
|
_name.data(), _track, _beg_ns,
|
||||||
|
::perfetto::Flow::ProcessScoped(_corr_id),
|
||||||
|
add_perfetto_annotations);
|
||||||
|
|
||||||
|
tracing::pop_perfetto(category::rocm_scratch_memory{}, "", _track,
|
||||||
|
_end_ns);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const auto _track = tracing::get_perfetto_track(
|
||||||
|
category::rocm_hip_stream{}, _track_desc_stream, _stream_id);
|
||||||
|
|
||||||
|
tracing::push_perfetto(category::rocm_hip_stream{}, _name.data(),
|
||||||
|
_track, _beg_ns,
|
||||||
|
::perfetto::Flow::ProcessScoped(_corr_id),
|
||||||
|
add_perfetto_annotations);
|
||||||
|
|
||||||
|
tracing::pop_perfetto(category::rocm_hip_stream{}, "", _track,
|
||||||
|
_end_ns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
|
||||||
{
|
{
|
||||||
auto* record =
|
auto* record =
|
||||||
@@ -2249,6 +2388,17 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
|
|||||||
_data->primary_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0,
|
_data->primary_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0,
|
||||||
_data->memory_copy_buffer));
|
_data->memory_copy_buffer));
|
||||||
}
|
}
|
||||||
|
if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY) > 0)
|
||||||
|
{
|
||||||
|
ROCPROFILER_CALL(rocprofiler_create_buffer(
|
||||||
|
_data->primary_ctx, buffer_size, watermark,
|
||||||
|
ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_buffered, tool_data,
|
||||||
|
&_data->scratch_memory_buffer));
|
||||||
|
|
||||||
|
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
|
||||||
|
_data->primary_ctx, ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, nullptr, 0,
|
||||||
|
_data->scratch_memory_buffer));
|
||||||
|
}
|
||||||
|
|
||||||
#if(ROCPROFILER_VERSION >= 600)
|
#if(ROCPROFILER_VERSION >= 600)
|
||||||
if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION) > 0)
|
if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION) > 0)
|
||||||
|
|||||||
@@ -122,7 +122,7 @@ using backtrace_operation_map_t =
|
|||||||
|
|
||||||
struct client_data
|
struct client_data
|
||||||
{
|
{
|
||||||
static constexpr size_t num_buffers = 4;
|
static constexpr size_t num_buffers = 5;
|
||||||
static constexpr size_t num_contexts = 2;
|
static constexpr size_t num_contexts = 2;
|
||||||
|
|
||||||
using buffer_name_info_t = rocprofiler::sdk::buffer_name_info_t<std::string_view>;
|
using buffer_name_info_t = rocprofiler::sdk::buffer_name_info_t<std::string_view>;
|
||||||
@@ -138,6 +138,7 @@ struct client_data
|
|||||||
rocprofiler_context_id_t primary_ctx = { 0 };
|
rocprofiler_context_id_t primary_ctx = { 0 };
|
||||||
rocprofiler_context_id_t counter_ctx = { 0 };
|
rocprofiler_context_id_t counter_ctx = { 0 };
|
||||||
rocprofiler_buffer_id_t kernel_dispatch_buffer = { 0 };
|
rocprofiler_buffer_id_t kernel_dispatch_buffer = { 0 };
|
||||||
|
rocprofiler_buffer_id_t scratch_memory_buffer = { 0 };
|
||||||
rocprofiler_buffer_id_t memory_copy_buffer = { 0 };
|
rocprofiler_buffer_id_t memory_copy_buffer = { 0 };
|
||||||
rocprofiler_buffer_id_t memory_alloc_buffer = { 0 };
|
rocprofiler_buffer_id_t memory_alloc_buffer = { 0 };
|
||||||
rocprofiler_buffer_id_t counter_collection_buffer = { 0 };
|
rocprofiler_buffer_id_t counter_collection_buffer = { 0 };
|
||||||
@@ -179,12 +180,9 @@ client_data::get_contexts() const
|
|||||||
inline client_data::buffer_id_vec_t
|
inline client_data::buffer_id_vec_t
|
||||||
client_data::get_buffers() const
|
client_data::get_buffers() const
|
||||||
{
|
{
|
||||||
return buffer_id_vec_t{
|
return buffer_id_vec_t{ kernel_dispatch_buffer, scratch_memory_buffer,
|
||||||
kernel_dispatch_buffer,
|
memory_copy_buffer, memory_alloc_buffer,
|
||||||
memory_copy_buffer,
|
counter_collection_buffer };
|
||||||
memory_alloc_buffer,
|
|
||||||
counter_collection_buffer,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const rocprofsys_agent_t*
|
inline const rocprofsys_agent_t*
|
||||||
|
|||||||
@@ -47,7 +47,6 @@ from .parser import parse_uploaded_file
|
|||||||
from .parser import find_causal_files
|
from .parser import find_causal_files
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
|
||||||
file_timestamp = 0
|
file_timestamp = 0
|
||||||
global_data = pd.DataFrame()
|
global_data = pd.DataFrame()
|
||||||
global_samples = pd.DataFrame()
|
global_samples = pd.DataFrame()
|
||||||
|
|||||||
@@ -37,7 +37,6 @@ from . import libpyrocprofsys
|
|||||||
from .libpyrocprofsys.profiler import profiler_init as _profiler_init
|
from .libpyrocprofsys.profiler import profiler_init as _profiler_init
|
||||||
from .libpyrocprofsys.profiler import profiler_finalize as _profiler_fini
|
from .libpyrocprofsys.profiler import profiler_finalize as _profiler_fini
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["exec_", "_file", "_get_argv", "_initialize", "_finalize"]
|
__all__ = ["exec_", "_file", "_get_argv", "_initialize", "_finalize"]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ from .libpyrocprofsys.user import pop_region
|
|||||||
from .common import _initialize
|
from .common import _initialize
|
||||||
from .common import _file
|
from .common import _file
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"region",
|
"region",
|
||||||
"Region",
|
"Region",
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Referens i nytt ärende
Block a user