Add support for hipExtStreamGetCUMask API

Change-Id: I0fa67ad581dd75556f17c7410af2c1d5cb6ae99a
Этот коммит содержится в:
Aryan Salmanpour
2020-11-03 12:24:04 -05:00
коммит произвёл Aryan Salmanpour
родитель 5e43a6defb
Коммит d9a335bccf
5 изменённых файлов: 102 добавлений и 0 удалений
+14
Просмотреть файл
@@ -1196,6 +1196,20 @@ hipError_t hipStreamGetPriority(hipStream_t stream, int* priority);
hipError_t hipExtStreamCreateWithCUMask(hipStream_t* stream, uint32_t cuMaskSize, const uint32_t* cuMask);
/**
* @brief Get CU mask associated with an asynchronous stream
*
* @param[in] stream stream to be queried
* @param[in] cuMaskSize number of the block of memories (uint32_t *) allocated by user
* @param[out] cuMask Pointer to a pre-allocated block of memories (uint32_t *) in which
* the stream's CU mask is returned. The CU mask is returned in a chunck of 32 bits where
* each active bit represents one active CU
* @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue
*
* @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
*/
hipError_t hipExtStreamGetCUMask(hipStream_t stream, uint32_t cuMaskSize, uint32_t* cuMask);
/**
* Stream CallBack struct
*/
+1
Просмотреть файл
@@ -266,3 +266,4 @@ hipMemcpyHtoA
hipMemcpyParam2DAsync
__gnu_h2f_ieee
__gnu_f2h_ieee
hipExtStreamGetCUMask
+1
Просмотреть файл
@@ -275,6 +275,7 @@ global:
hipMemcpyHtoA;
hipMemcpyParam2DAsync;
__hipGetPCH;
hipExtStreamGetCUMask;
};
local:
*;
+2
Просмотреть файл
@@ -156,6 +156,8 @@ namespace hip {
unsigned int Flags() const { return flags_; }
/// Returns the priority for the current stream
Priority GetPriority() const { return priority_; }
/// Returns the CU mask for the current stream
const std::vector<uint32_t> GetCUMask() const { return cuMask_; }
/// Sync all non-blocking streams
static void syncNonBlockingStreams();
+84
Просмотреть файл
@@ -396,3 +396,87 @@ hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) {
HIP_RETURN(hipSuccess);
}
// ================================================================================================
hipError_t hipExtStreamGetCUMask(hipStream_t stream, uint32_t cuMaskSize, uint32_t* cuMask) {
HIP_INIT_API(hipExtStreamGetCUMask, stream, cuMaskSize, cuMask);
if (cuMask == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
int deviceId = hip::getCurrentDevice()->deviceId();
auto* deviceHandle = g_devices[deviceId]->devices()[0];
const auto& info = deviceHandle->info();
// find the minimum cuMaskSize required to present the CU mask bit-array in a patch of 32 bits
// and return error if the cuMaskSize argument is less than cuMaskSizeRequired
uint32_t cuMaskSizeRequired = info.maxComputeUnits_ / 32 +
((info.maxComputeUnits_ % 32) ? 1 : 0);
if (cuMaskSize < cuMaskSizeRequired) {
HIP_RETURN(hipErrorInvalidValue);
}
// make a default CU mask bit-array where all CUs are active
// this default mask will be returned when there is no
// custom or global CU mask defined
std::vector<uint32_t> defaultCUMask;
uint32_t temp = 0;
uint32_t bit_index = 0;
for (uint32_t i = 0; i < info.maxComputeUnits_; i++) {
temp |= 1UL << bit_index;
if (bit_index >= 32) {
defaultCUMask.push_back(temp);
temp = 0;
bit_index = 0;
temp |= 1UL << bit_index;
}
bit_index += 1;
}
if (bit_index != 0) {
defaultCUMask.push_back(temp);
}
// if the stream is null then either return globalCUMask_ (if it is defined)
// or return defaultCUMask
if (stream == nullptr) {
if (info.globalCUMask_.size() != 0) {
std::copy(info.globalCUMask_.begin(), info.globalCUMask_.end(), cuMask);
} else {
std::copy(defaultCUMask.begin(), defaultCUMask.end(), cuMask);
}
} else {
// if the stream is not null then get the stream's CU mask and return one of the below cases
// case1 if globalCUMask_ is defined then return the AND of globalCUMask_ and stream's CU mask
// case2 if globalCUMask_ is not defined then retuen AND of defaultCUMask and stream's CU mask
// in both cases above if stream's CU mask is empty then either globalCUMask_ (for case1)
// or defaultCUMask(for case2) will be returned
std::vector<uint32_t> streamCUMask;
streamCUMask = reinterpret_cast<hip::Stream*>(stream)->GetCUMask();
std::vector<uint32_t> mask = {};
if (info.globalCUMask_.size() != 0) {
for (uint32_t i = 0; i < std::min(streamCUMask.size(), info.globalCUMask_.size()); i++) {
mask.push_back(streamCUMask[i] & info.globalCUMask_[i]);
}
} else {
for (uint32_t i = 0; i < std::min(streamCUMask.size(), defaultCUMask.size()); i++) {
mask.push_back(streamCUMask[i] & defaultCUMask[i]);
}
// check to make sure after ANDing streamCUMask (custom-defined) with global CU mask,
//we have non-zero mask, oterwise just return either globalCUMask_ or defaultCUMask
bool zeroCUMask = true;
for (auto m : mask) {
if (m != 0) {
zeroCUMask = false;
break;
}
}
if (zeroCUMask) {
mask = (info.globalCUMask_.size() != 0) ? info.globalCUMask_ : defaultCUMask;
}
std::copy(mask.begin(), mask.end(), cuMask);
}
}
HIP_RETURN(hipSuccess);
}