Merge branch 'privatestaging' of https://github.com/AMDComputeLibraries/HIP-privatestaging into privatestaging
Cette révision appartient à :
@@ -45,6 +45,20 @@ Verify your can find hipconfig (one of the hip tools in bin dir):
|
||||
```
|
||||
> hipconfig -pn
|
||||
/home/me/HIP
|
||||
|
||||
|
||||
### Using HIP with the AMD Native-GCN compiler.
|
||||
AMD recently released a direct-to-GCN-ISA target. This compiler generates GCN ISA directly from LLVM, without going through an intermediate compiler
|
||||
IR such as HSAIL or PTX.
|
||||
The native GCN target is included with upstream LLVM, and has also been integrated with HCC compiler and can be used to compiler HIP programs for AMD.
|
||||
Here's how to use it with HIP:
|
||||
|
||||
- Follow the instructions here to compile the HCC and native LLVM compiler:
|
||||
> https://github.com/RadeonOpenCompute/HCC-Native-GCN-ISA/wiki
|
||||
> (In the make step for HCC, we recommend setting -DCMAKE_INSTALL_PREFIX=/opt/hcc-native)
|
||||
|
||||
Set HCC_HOME environment variable before compiling HIP program to point to the native compiler:
|
||||
> export HCC_HOME=/opt/hcc-native
|
||||
```
|
||||
|
||||
## Examples and Getting Started:
|
||||
|
||||
+37
-3
@@ -15,6 +15,7 @@ GetOptions(
|
||||
,"platform|P" => \$p_platform
|
||||
,"cpp_config|cxx_config|C" => \$p_cpp_config
|
||||
,"full|f|info" => \$p_full,
|
||||
,"check" => \$p_check,
|
||||
,"newline|n" => \$p_newline
|
||||
);
|
||||
|
||||
@@ -25,6 +26,7 @@ if ($p_help) {
|
||||
print " --compiler, -c : print compiler (hcc or nvcc)\n";
|
||||
print " --platform, -P : print platform (hcc or nvcc)\n";
|
||||
print " --full, -f : print full config\n";
|
||||
print " --check : check configuration\n";
|
||||
print " --newline, -n : print newline\n";
|
||||
print " --help, -h : print help message\n";
|
||||
exit();
|
||||
@@ -37,6 +39,9 @@ $CUDA_PATH='/usr/local/cuda' unless defined $CUDA_PATH;
|
||||
$HCC_HOME=$ENV{'HCC_HOME'};
|
||||
$HCC_HOME='/opt/hcc' unless defined $HCC_HOME;
|
||||
|
||||
$HSA_PATH=$ENV{'HSA_PATH'};
|
||||
$HSA_PATH='/opt/hsa' unless defined $HSA_PATH;
|
||||
|
||||
#---
|
||||
#HIP_PLATFORM controls whether to use NVCC or HCC for compilation:
|
||||
$HIP_PLATFORM=$ENV{'HIP_PLATFORM'};
|
||||
@@ -67,7 +72,15 @@ if ($p_cpp_config) {
|
||||
$printed = 1;
|
||||
}
|
||||
|
||||
if ($p_full) {
|
||||
|
||||
if ($p_compiler or $p_platform) {
|
||||
print $HIP_PLATFORM;
|
||||
$printed = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (!$printed or $p_full) {
|
||||
print "== hipconfig\n";
|
||||
print "HIP_PATH : ", $HIP_PATH, "\n";
|
||||
print "HIP_PLATFORM : ", $HIP_PLATFORM, "\n";
|
||||
@@ -76,6 +89,7 @@ if ($p_full) {
|
||||
{
|
||||
print "\n" ;
|
||||
print "== hcc\n";
|
||||
print ("HSA_PATH : $HSA_PATH\n");
|
||||
print ("HCC_HOME : $HCC_HOME\n");
|
||||
system("$HCC_HOME/bin/hcc --version");
|
||||
print ("HCC-cxxflags: ");
|
||||
@@ -107,10 +121,30 @@ if ($p_full) {
|
||||
}
|
||||
|
||||
|
||||
if (!$printed or $p_compiler or $p_platform) {
|
||||
print $HIP_PLATFORM;
|
||||
if ($p_check) {
|
||||
print "\nCheck system installation:\n";
|
||||
|
||||
printf ("%-70s", "check hipconfig in PATH...");
|
||||
if (system ("hipconfig > /dev/null 2>&1") != 0) {
|
||||
print "FAIL\n";
|
||||
} else {
|
||||
printf "good\n";
|
||||
}
|
||||
|
||||
if ($HIP_PLATFORM eq "hcc") {
|
||||
$LD_LIBRARY_PATH=$ENV{'LD_LIBRARY_PATH'};
|
||||
printf("%-70s", "check LD_LIBRARY_PATH ($LD_LIBRARY_PATH) contains HSA_PATH ($HSA_PATH)...");
|
||||
if (index($LD_LIBRARY_PATH, $HSA_PATH) == -1) {
|
||||
print "FAIL\n";
|
||||
} else {
|
||||
printf "good\n";
|
||||
}
|
||||
|
||||
# TODO - check hipcc / nvcc found and executable.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ($p_newline) {
|
||||
print "\n";
|
||||
}
|
||||
|
||||
@@ -115,7 +115,7 @@ directory names.
|
||||
|
||||
|
||||
```shell
|
||||
> hipconverinplace.sh MY_SRC_DIR
|
||||
> hipconvertinplace.sh MY_SRC_DIR
|
||||
```
|
||||
|
||||
|
||||
|
||||
+28
-16
@@ -46,7 +46,7 @@ THE SOFTWARE.
|
||||
|
||||
|
||||
#define USE_AM_TRACKER 1 /* >0 = use new AM memory tracker features. */
|
||||
#define USE_ROCR_V2 0 /* use the ROCR v2 async copy API with dst and src agents */
|
||||
#define USE_ROCR_V2 1 /* use the ROCR v2 async copy API with dst and src agents */
|
||||
|
||||
#if (USE_AM_TRACKER) and (__hcc_workweek__ < 16074)
|
||||
#error (USE_AM_TRACKER requries HCC version of 16074 or newer)
|
||||
@@ -498,9 +498,14 @@ inline bool ihipStream_t::preKernelCommand()
|
||||
this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel], _last_copy_signal->_sig_id)
|
||||
|
||||
} else {
|
||||
tprintf (TRACE_SYNC, "stream %p switch %s to %s (wait for previous...)\n",
|
||||
this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel]);
|
||||
this->waitAndReclaimOlder(_last_copy_signal);
|
||||
if (HIP_DISABLE_HW_KERNEL_DEP != -1) {
|
||||
tprintf (TRACE_SYNC, "stream %p switch %s to %s (wait for previous...)\n",
|
||||
this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel]);
|
||||
this->waitAndReclaimOlder(_last_copy_signal);
|
||||
} else {
|
||||
tprintf (TRACE_SYNC, "stream %p switch %s to %s (IGNORE dependency)\n",
|
||||
this, ihipCommandName[_last_command_type], ihipCommandName[ihipCommandKernel]);
|
||||
}
|
||||
}
|
||||
}
|
||||
_last_command_type = ihipCommandKernel;
|
||||
@@ -548,9 +553,15 @@ inline int ihipStream_t::copyCommand(ihipSignal_t *lastCopy, hsa_signal_t *waitS
|
||||
}
|
||||
|
||||
if (HIP_DISABLE_HW_COPY_DEP && needSync) {
|
||||
// do the wait here on the host, and disable the device-side command resolution.
|
||||
hsa_signal_wait_acquire(*waitSignal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
|
||||
needSync = 0;
|
||||
if (HIP_DISABLE_HW_COPY_DEP == -1) {
|
||||
tprintf (TRACE_SYNC, "IGNORE copy dependency\n")
|
||||
|
||||
} else {
|
||||
tprintf (TRACE_SYNC, "HOST-wait for copy dependency\n")
|
||||
// do the wait here on the host, and disable the device-side command resolution.
|
||||
hsa_signal_wait_acquire(*waitSignal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
|
||||
needSync = 0;
|
||||
}
|
||||
}
|
||||
|
||||
_last_command_type = copyType;
|
||||
@@ -937,8 +948,8 @@ void ihipInit()
|
||||
READ_ENV_I(release, HIP_STREAM_SIGNALS, 0, "Number of signals to allocate when new stream is created (signal pool will grow on demand)");
|
||||
READ_ENV_I(release, HIP_VISIBLE_DEVICES, CUDA_VISIBLE_DEVICES, "Only devices whose index is present in the secquence are visible to HIP applications and they are enumerated in the order of secquence" );
|
||||
|
||||
READ_ENV_I(release, HIP_DISABLE_HW_KERNEL_DEP, 0, "Disable HW dependencies before kernel commands - instead wait for dependency on host.");
|
||||
READ_ENV_I(release, HIP_DISABLE_HW_COPY_DEP, 0, "Disable HW dependencies before copy commands - instead wait for dependency on host.");
|
||||
READ_ENV_I(release, HIP_DISABLE_HW_KERNEL_DEP, 0, "Disable HW dependencies before kernel commands - instead wait for dependency on host. -1 means ignore these dependencies. (debug mode)");
|
||||
READ_ENV_I(release, HIP_DISABLE_HW_COPY_DEP, 0, "Disable HW dependencies before copy commands - instead wait for dependency on host. -1 means ifnore these dependencies (debug mode)");
|
||||
READ_ENV_I(release, HIP_DISABLE_BIDIR_MEMCPY, 0, "Disable simultaneous H2D memcpy and D2H memcpy to same device");
|
||||
READ_ENV_I(release, HIP_ONESHOT_COPY_DEP, 0, "If set, only set the copy input dependency for the first copy command in a staged copy. If clear, set the dep for each copy.");
|
||||
|
||||
@@ -2262,15 +2273,14 @@ void ihipSyncCopy(ihipStream_t *stream, void* dst, const void* src, size_t sizeB
|
||||
}
|
||||
}
|
||||
|
||||
hsa_signal_t depSignal;
|
||||
int depSignalCnt = stream->copyCommand(NULL, &depSignal, ihipCommandCopyH2D);
|
||||
|
||||
if ((kind == hipMemcpyHostToDevice) && (srcNotTracked)) {
|
||||
if (HIP_STAGING_BUFFERS) {
|
||||
std::lock_guard<std::mutex> l (device->_copy_lock[0]);
|
||||
//printf ("staged-copy- read dep signals\n");
|
||||
|
||||
hsa_signal_t depSignal;
|
||||
int depSignalCnt = stream->copyCommand(NULL, &depSignal, ihipCommandCopyH2D);
|
||||
|
||||
if (HIP_PININPLACE) {
|
||||
device->_staging_buffer[0]->CopyHostToDevicePinInPlace(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
|
||||
} else {
|
||||
@@ -2287,14 +2297,17 @@ void ihipSyncCopy(ihipStream_t *stream, void* dst, const void* src, size_t sizeB
|
||||
if (HIP_STAGING_BUFFERS) {
|
||||
std::lock_guard<std::mutex> l (device->_copy_lock[HIP_DISABLE_BIDIR_MEMCPY ? 0:1]);
|
||||
//printf ("staged-copy- read dep signals\n");
|
||||
hsa_signal_t depSignal;
|
||||
int depSignalCnt = stream->copyCommand(NULL, &depSignal, ihipCommandCopyD2H);
|
||||
device->_staging_buffer[1]->CopyDeviceToHost(dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
|
||||
} else {
|
||||
// TODO - remove, slow path.
|
||||
hc::am_copy(dst, src, sizeBytes);
|
||||
}
|
||||
} else if (kind == hipMemcpyHostToHost) { // TODO-refactor.
|
||||
|
||||
if (depSignalCnt) {
|
||||
// host waits before doing host memory copy.
|
||||
hsa_signal_wait_acquire(depSignal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE);
|
||||
}
|
||||
memcpy(dst, src, sizeBytes);
|
||||
|
||||
} else {
|
||||
@@ -2315,8 +2328,6 @@ void ihipSyncCopy(ihipStream_t *stream, void* dst, const void* src, size_t sizeB
|
||||
|
||||
|
||||
#if USE_ROCR_V2
|
||||
hsa_signal_t depSignal;
|
||||
int depSignalCnt = stream->copyCommand(NULL, &depSignal, copyType);
|
||||
hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, device->_hsa_agent, src, device->_hsa_agent, sizeBytes, depSignalCnt, depSignalCnt ? &depSignal:0x0, device->_copy_signal);
|
||||
#else
|
||||
hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, sizeBytes, device->_hsa_agent, 0, NULL, device->_copy_signal);
|
||||
@@ -2370,6 +2381,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
|
||||
/**
|
||||
* @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue
|
||||
* @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.
|
||||
* @warning on HCC hipMemcpyAsync requires that any host pointers are pinned (ie via the hipMallocHost call).
|
||||
*/
|
||||
//---
|
||||
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
|
||||
|
||||
Référencer dans un nouveau ticket
Bloquer un utilisateur