SWDEV-1 - Delete sample 20_hip_vulkan

- Delete sample 20_hip_vulkan as it contains NVIDIA Copyright

Change-Id: If9638a121bdfcf08813d3ea9eb4a14f78170c2a6


[ROCm/hip-tests commit: 6ce6b54085]
This commit is contained in:
Rakesh Roy
2024-03-08 12:22:12 +05:30
committed by Rakesh Roy
orang tua 291ee99ba2
melakukan 96db0dbd8d
11 mengubah file dengan 0 tambahan dan 3297 penghapusan
@@ -1,99 +0,0 @@
# Copyright (c) 2020 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# hipcc.bat fails to qualify as a valid compiler for CMAKE_CXX_COMPILER_ID = ROCMClang
# so the simple compiler test is skipped and forced to use hipcc.bat as compiler
set(CMAKE_C_COMPILER_WORKS 1)
set(CMAKE_CXX_COMPILER_WORKS 1)
set(CMAKE_CXX_STANDARD 14)
project(hipVulkan)
cmake_minimum_required(VERSION 3.10)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake;${CMAKE_MODULE_PATH}")
if(UNIX)
if(NOT DEFINED ROCM_PATH)
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory.")
endif()
# Search for rocm in common locations
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
endif()
# need to set rocm_path for windows
# since clang and hip are two different folders during build/install step
if (WIN32 AND HIPINFO_INTERNAL_BUILD)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --rocm-path=${CMAKE_PREFIX_PATH}")
endif()
# Find hip
find_package(hip REQUIRED)
if (WIN32)
find_package(GLFW3)
if(NOT GLFW_FOUND)
if(EXISTS "${GLFW_PATH}")
message(STATUS "FOUND GLFW SDK: ${GLFW_PATH}")
elseif (EXISTS "$ENV{GLFW_PATH}")
message(STATUS "FOUND GLFW SDK: $ENV{GLFW_PATH}")
set(GLFW_PATH $ENV{GLFW_PATH})
else()
message("Error: Unable to locate GLFW SDK. please specify GLFW_PATH")
return()
endif()
endif()
endif(WIN32)
find_package(Vulkan)
if(NOT Vulkan_FOUND)
if(EXISTS "${VULKAN_PATH}")
message(STATUS "Vulkan SDK: ${VULKAN_PATH}")
elseif (EXISTS "$ENV{VULKAN_SDK}")
message(STATUS "FOUND VULKAN SDK: $ENV{VULKAN_SDK}")
set(VULKAN_PATH $ENV{VULKAN_SDK})
else()
message("Error: Unable to locate Vulkan SDK. please specify VULKAN_PATH")
return()
endif()
endif()
set(VULKAN_PATH ${Vulkan_INCLUDE_DIRS})
STRING(REGEX REPLACE "/[Ii]nclude" "" VULKAN_PATH ${VULKAN_PATH})
# Include Vulkan header files from Vulkan SDK
include_directories(AFTER ${VULKAN_PATH}/include)
link_directories(${VULKAN_PATH}/bin;${VULKAN_PATH}/lib;)
link_directories(${GLFW_PATH}/lib-vc2019)
# Set compiler and linker
set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(hipVulkan VulkanBaseApp.cpp VulkanBaseApp.h main.cpp SineWaveSimulation.cpp SineWaveSimulation.h linmath.h)
include_directories(${CMAKE_PREFIX_PATH}/include)
include_directories(${GLFW_PATH}/include)
# Link with HIP
if (WIN32)
target_link_libraries(hipVulkan advapi32 hip::host vulkan-1 glfw3dll)
else (WIN32)
target_link_libraries(hipVulkan hip::host vulkan glfw)
endif (WIN32)
if(TARGET build_cookbook)
add_dependencies(build_cookbook hipVulkan)
endif()
@@ -1,147 +0,0 @@
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Modifications Copyright (C)2021 Advanced
* Micro Devices, Inc. All rights reserved.
*/
#include "SineWaveSimulation.h"
#include <algorithm>
//#include <helper_cuda.h>
#include "hip/hip_runtime.h"
__global__ void sinewave(float *heightMap, unsigned int width, unsigned int height, float time)
{
const float freq = 4.0f;
const size_t stride = gridDim.x * blockDim.x;
// Iterate through the entire array in a way that is
// independent of the grid configuration
for (size_t tid = blockIdx.x * blockDim.x + threadIdx.x; tid < width * height; tid += stride) {
// Calculate the x, y coordinates
const size_t y = tid / width;
const size_t x = tid - y * width;
// Normalize x, y to [0,1]
const float u = ((2.0f * x) / width) - 1.0f;
const float v = ((2.0f * y) / height) - 1.0f;
// Calculate the new height value
const float w = 0.5f * sinf(u * freq + time) * cosf(v * freq + time);
// Store this new height value
heightMap[tid] = w;
}
}
SineWaveSimulation::SineWaveSimulation(size_t width, size_t height)
: m_heightMap(nullptr), m_width(width), m_height(height)
{
}
void SineWaveSimulation::initCudaLaunchConfig(int device)
{
hipDeviceProp_t prop = {};
checkHIPErrors(hipSetDevice(device));
checkHIPErrors(hipGetDeviceProperties(&prop, device));
// We don't need large block sizes, since there's not much inter-thread communication
m_threads = prop.warpSize;
// Use the occupancy calculator and fill the gpu as best as we can
checkHIPErrors(hipOccupancyMaxActiveBlocksPerMultiprocessor(&m_blocks, sinewave, prop.warpSize, 0));
m_blocks *= prop.multiProcessorCount;
// Go ahead and the clamp the blocks to the minimum needed for this height/width
m_blocks = std::min(m_blocks, (int)((m_width * m_height + m_threads - 1) / m_threads));
}
int SineWaveSimulation::initCuda(uint8_t *vkDeviceUUID, size_t UUID_SIZE)
{
int current_device = 0;
int device_count = 0;
int devices_prohibited = 0;
hipDeviceProp_t deviceProp;
checkHIPErrors(hipGetDeviceCount(&device_count));
if (device_count == 0) {
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
// Find the GPU which is selected by Vulkan
while (current_device < device_count) {
hipGetDeviceProperties(&deviceProp, current_device);
if ((deviceProp.computeMode != hipComputeModeProhibited)) {
// Compare the cuda device UUID with vulkan UUID
// FIXME
int ret = 0; // memcmp((void*)&deviceProp.uuid, vkDeviceUUID, UUID_SIZE);
if (ret == 0)
{
checkHIPErrors(hipSetDevice(current_device));
checkHIPErrors(hipGetDeviceProperties(&deviceProp, current_device));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
current_device, deviceProp.name, deviceProp.major,
deviceProp.minor);
return current_device;
}
} else {
devices_prohibited++;
}
current_device++;
}
if (devices_prohibited == device_count) {
fprintf(stderr,
"HIP error:"
" No Vulkan-HIP Interop capable GPU found.\n");
exit(EXIT_FAILURE);
}
return -1;
}
SineWaveSimulation::~SineWaveSimulation()
{
m_heightMap = NULL;
}
void SineWaveSimulation::initSimulation(float *heights)
{
m_heightMap = heights;
}
void SineWaveSimulation::stepSimulation(float time, hipStream_t stream)
{
hipLaunchKernelGGL(sinewave, dim3(m_blocks), dim3(m_threads), 0, stream , m_heightMap, m_width, m_height, time);
getLastHIPError("Failed to launch CUDA simulation");
//hipStreamSynchronize(stream);
}
@@ -1,101 +0,0 @@
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Modifications Copyright (C)2021 Advanced
* Micro Devices, Inc. All rights reserved.
*/
#pragma once
#ifndef __SINESIM_H__
#define __SINESIM_H__
#include <vector>
#include <hip/hip_runtime_api.h>
#include <stdint.h>
#include "linmath.h"
class SineWaveSimulation
{
float *m_heightMap;
size_t m_width, m_height;
int m_blocks, m_threads;
public:
SineWaveSimulation(size_t width, size_t height);
~SineWaveSimulation();
void initSimulation(float *heightMap);
void stepSimulation(float time, hipStream_t stream = 0);
void initCudaLaunchConfig(int device);
int initCuda(uint8_t *vkDeviceUUID, size_t UUID_SIZE);
size_t getWidth() const {
return m_width;
}
size_t getHeight() const {
return m_height;
}
};
template <typename T>
void check(T result, char const* const func, const char* const file,
int const line) {
if (result) {
fprintf(stderr, "HIP error at %s:%d code=%d \"%s\" \n", file, line, static_cast<unsigned int>(result), func);
// static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
exit(EXIT_FAILURE);
}
}
// This will output the proper CUDA error strings in the event
// that a CUDA host call returns an error
#define checkHIPErrors(val) check((val), #val, __FILE__, __LINE__)
// This will output the proper error string when calling cudaGetLastError
#define getLastHIPError(msg) __getLastHIPError(msg, __FILE__, __LINE__)
inline void __getLastHIPError(const char* errorMessage, const char* file,
const int line) {
hipError_t err = hipGetLastError();
if (hipSuccess != err) {
fprintf(stderr,
"%s(%i) : getLastHIPError() HIP error :"
" %s : (%d) %s.\n",
file, line, errorMessage, static_cast<int>(err),
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
}
#ifndef MAX
#define MAX(a, b) (a > b ? a : b)
#endif
#endif // __SINESIM_H__
@@ -1,146 +0,0 @@
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Modifications Copyright (C)2021 Advanced
* Micro Devices, Inc. All rights reserved.
*/
#pragma once
#ifndef __VULKANBASEAPP_H__
#define __VULKANBASEAPP_H__
#include <string>
#include <vector>
#include <vulkan/vulkan.h>
#ifdef _WIN64
#define NOMINMAX
#include <windows.h>
#include <vulkan/vulkan_win32.h>
#endif /* _WIN64 */
struct GLFWwindow;
class VulkanBaseApp
{
public:
VulkanBaseApp(const std::string& appName, bool enableValidation = false);
static VkExternalSemaphoreHandleTypeFlagBits getDefaultSemaphoreHandleType();
static VkExternalMemoryHandleTypeFlagBits getDefaultMemHandleType();
virtual ~VulkanBaseApp();
void init();
void *getMemHandle(VkDeviceMemory memory, VkExternalMemoryHandleTypeFlagBits handleType);
void *getSemaphoreHandle(VkSemaphore semaphore, VkExternalSemaphoreHandleTypeFlagBits handleType);
void createExternalSemaphore(VkSemaphore& semaphore, VkExternalSemaphoreHandleTypeFlagBits handleType);
void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer& buffer, VkDeviceMemory& bufferMemory);
void createExternalBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkExternalMemoryHandleTypeFlagsKHR extMemHandleType, VkBuffer& buffer, VkDeviceMemory& bufferMemory);
void importExternalBuffer(void *handle, VkExternalMemoryHandleTypeFlagBits handleType, size_t size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer& buffer, VkDeviceMemory& memory);
void copyBuffer(VkBuffer dst, VkBuffer src, VkDeviceSize size);
VkCommandBuffer beginSingleTimeCommands();
void endSingleTimeCommands(VkCommandBuffer commandBuffer);
void mainLoop();
protected:
const std::string m_appName;
const bool m_enableValidation;
VkInstance m_instance;
VkDebugUtilsMessengerEXT m_debugMessenger;
VkSurfaceKHR m_surface;
VkPhysicalDevice m_physicalDevice;
VkDevice m_device;
VkQueue m_graphicsQueue;
VkQueue m_presentQueue;
VkSwapchainKHR m_swapChain;
std::vector<VkImage> m_swapChainImages;
VkFormat m_swapChainFormat;
VkExtent2D m_swapChainExtent;
std::vector<VkImageView> m_swapChainImageViews;
std::vector<std::pair<VkShaderStageFlagBits, std::string> > m_shaderFiles;
VkRenderPass m_renderPass;
VkPipelineLayout m_pipelineLayout;
VkPipeline m_graphicsPipeline;
std::vector<VkFramebuffer> m_swapChainFramebuffers;
VkCommandPool m_commandPool;
std::vector<VkCommandBuffer> m_commandBuffers;
std::vector<VkSemaphore> m_imageAvailableSemaphores;
std::vector<VkSemaphore> m_renderFinishedSemaphores;
std::vector<VkFence> m_inFlightFences;
std::vector<VkBuffer> m_uniformBuffers;
std::vector<VkDeviceMemory> m_uniformMemory;
VkDescriptorSetLayout m_descriptorSetLayout;
VkDescriptorPool m_descriptorPool;
std::vector<VkDescriptorSet> m_descriptorSets;
VkImage m_depthImage;
VkDeviceMemory m_depthImageMemory;
VkImageView m_depthImageView;
size_t m_currentFrame;
bool m_framebufferResized;
uint8_t m_vkDeviceUUID[VK_UUID_SIZE];
virtual void initVulkanApp() {}
virtual void fillRenderingCommandBuffer(VkCommandBuffer& buffer) {}
virtual std::vector<const char *> getRequiredExtensions() const;
virtual std::vector<const char *> getRequiredDeviceExtensions() const;
virtual void getVertexDescriptions(std::vector<VkVertexInputBindingDescription>& bindingDesc, std::vector<VkVertexInputAttributeDescription>& attribDesc);
virtual void getAssemblyStateInfo(VkPipelineInputAssemblyStateCreateInfo& info);
virtual void getWaitFrameSemaphores(std::vector<VkSemaphore>& wait, std::vector< VkPipelineStageFlags>& waitStages) const;
virtual void getSignalFrameSemaphores(std::vector<VkSemaphore>& signal) const;
virtual VkDeviceSize getUniformSize() const;
virtual void updateUniformBuffer(uint32_t imageIndex);
virtual void drawFrame();
private:
GLFWwindow *m_window;
void initWindow();
void initVulkan();
void createInstance();
void createSurface();
void createDevice();
void createSwapChain();
void createImageViews();
void createRenderPass();
void createDescriptorSetLayout();
void createGraphicsPipeline();
void createFramebuffers();
void createCommandPool();
void createDepthResources();
void createUniformBuffers();
void createDescriptorPool();
void createDescriptorSets();
void createCommandBuffers();
void createSyncObjects();
void cleanupSwapChain();
void recreateSwapChain();
bool isSuitableDevice(VkPhysicalDevice dev) const;
static void resizeCallback(GLFWwindow *window, int width, int height);
};
void readFile(std::istream& s, std::vector<char>& data);
#endif /* __VULKANBASEAPP_H__ */
@@ -1,30 +0,0 @@
Windows
--------
Prepare
• Install hip and visual studio
• Install vulkan sdk from vulkan.lunarg.com
• Download GLFW binaries from glfw.org
• Convert sinwave.farg and vert files to spv
• c:\VulkanSDK\1.3.243.0\bin\glslangValidator.exe sinewave.vert -V -o vert.spv
• c:\VulkanSDK\1.3.243.0\bin\glslangValidator.exe sinewave.frag -V -o frag.spv
Build without CMake
• set HCC_AMDGPU_TARGET=gfx906:sramecc-:xnack- (for your graphic card, you can get the name from hipinfo )
• hipcc -v *.cpp *.hip -o hip_vulkan_image.exe -Ic:\VulkanSDK\1.3.243.0\include -L c:\VulkanSDK\1.3.243.0\lib -Ic:\glfw-3.3.8.bin.WIN64\include -L c:\glfw-3.3.8.bin.WIN64\lib-vc2019 -Ic:\hip\include\hip -lglfw3dll -lvulkan-1 -ladvapi32 -std=c++14
• run hip_vulkan_image.exe, you should see a 3D sinewave simulation
Build with CMake:
• mkdir build; cd build
• cmake.exe -GNinja -DCMAKE_CXX_COMPILER_ID=ROCMClang -DCMAKE_C_COMPILER_ID=ROCMClang -DCMAKE_PREFIX_PATH=d:\driver2\drivers\drivers\compute\hip_sdk
Linux
------
• Ideally, vulkan should be picked up by cmake from the location where it is installed. eg: /usr/lib
• If a specific version of vulkan is needed, install vulkan sdk from vulkan.lunarg.com following the steps
• To run this sample, connect to the machine where display is enabled using NoMachine app
Build with CMake:
• mkdir build; cd build
• cmake ..
• make
• run hipVulkan executable
@@ -1,502 +0,0 @@
/*
* Copyright (c) 2015-2016 The Khronos Group Inc.
* Copyright (c) 2015-2016 Valve Corporation
* Copyright (c) 2015-2016 LunarG, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Relicensed from the WTFPL (http://www.wtfpl.net/faq/).
*/
#ifndef LINMATH_H
#define LINMATH_H
#define _USE_MATH_DEFINES
#include <math.h>
// Converts degrees to radians.
#define degreesToRadians(angleDegrees) (angleDegrees * M_PI / 180.0)
// Converts radians to degrees.
#define radiansToDegrees(angleRadians) (angleRadians * 180.0 / M_PI)
typedef float vec3[3];
static inline void vec3_add(vec3 r, vec3 const a, vec3 const b) {
int i;
for (i = 0; i < 3; ++i) r[i] = a[i] + b[i];
}
static inline void vec3_sub(vec3 r, vec3 const a, vec3 const b) {
int i;
for (i = 0; i < 3; ++i) r[i] = a[i] - b[i];
}
static inline void vec3_scale(vec3 r, vec3 const v, float const s) {
int i;
for (i = 0; i < 3; ++i) r[i] = v[i] * s;
}
static inline float vec3_mul_inner(vec3 const a, vec3 const b) {
float p = 0.f;
int i;
for (i = 0; i < 3; ++i) p += b[i] * a[i];
return p;
}
static inline void vec3_mul_cross(vec3 r, vec3 const a, vec3 const b) {
r[0] = a[1] * b[2] - a[2] * b[1];
r[1] = a[2] * b[0] - a[0] * b[2];
r[2] = a[0] * b[1] - a[1] * b[0];
}
static inline float vec3_len(vec3 const v) { return sqrtf(vec3_mul_inner(v, v)); }
static inline void vec3_norm(vec3 r, vec3 const v) {
float k = 1.f / vec3_len(v);
vec3_scale(r, v, k);
}
static inline void vec3_reflect(vec3 r, vec3 const v, vec3 const n) {
float p = 2.f * vec3_mul_inner(v, n);
int i;
for (i = 0; i < 3; ++i) r[i] = v[i] - p * n[i];
}
typedef float vec4[4];
static inline void vec4_add(vec4 r, vec4 const a, vec4 const b) {
int i;
for (i = 0; i < 4; ++i) r[i] = a[i] + b[i];
}
static inline void vec4_sub(vec4 r, vec4 const a, vec4 const b) {
int i;
for (i = 0; i < 4; ++i) r[i] = a[i] - b[i];
}
static inline void vec4_scale(vec4 r, vec4 v, float s) {
int i;
for (i = 0; i < 4; ++i) r[i] = v[i] * s;
}
static inline float vec4_mul_inner(vec4 a, vec4 b) {
float p = 0.f;
int i;
for (i = 0; i < 4; ++i) p += b[i] * a[i];
return p;
}
static inline void vec4_mul_cross(vec4 r, vec4 a, vec4 b) {
r[0] = a[1] * b[2] - a[2] * b[1];
r[1] = a[2] * b[0] - a[0] * b[2];
r[2] = a[0] * b[1] - a[1] * b[0];
r[3] = 1.f;
}
static inline float vec4_len(vec4 v) { return sqrtf(vec4_mul_inner(v, v)); }
static inline void vec4_norm(vec4 r, vec4 v) {
float k = 1.f / vec4_len(v);
vec4_scale(r, v, k);
}
static inline void vec4_reflect(vec4 r, vec4 v, vec4 n) {
float p = 2.f * vec4_mul_inner(v, n);
int i;
for (i = 0; i < 4; ++i) r[i] = v[i] - p * n[i];
}
typedef vec4 mat4x4[4];
static inline void mat4x4_identity(mat4x4 M) {
int i, j;
for (i = 0; i < 4; ++i)
for (j = 0; j < 4; ++j) M[i][j] = i == j ? 1.f : 0.f;
}
static inline void mat4x4_dup(mat4x4 M, mat4x4 N) {
int i, j;
for (i = 0; i < 4; ++i)
for (j = 0; j < 4; ++j) M[i][j] = N[i][j];
}
static inline void mat4x4_row(vec4 r, mat4x4 M, int i) {
int k;
for (k = 0; k < 4; ++k) r[k] = M[k][i];
}
static inline void mat4x4_col(vec4 r, mat4x4 M, int i) {
int k;
for (k = 0; k < 4; ++k) r[k] = M[i][k];
}
static inline void mat4x4_transpose(mat4x4 M, mat4x4 N) {
int i, j;
for (j = 0; j < 4; ++j)
for (i = 0; i < 4; ++i) M[i][j] = N[j][i];
}
static inline void mat4x4_add(mat4x4 M, mat4x4 a, mat4x4 b) {
int i;
for (i = 0; i < 4; ++i) vec4_add(M[i], a[i], b[i]);
}
static inline void mat4x4_sub(mat4x4 M, mat4x4 a, mat4x4 b) {
int i;
for (i = 0; i < 4; ++i) vec4_sub(M[i], a[i], b[i]);
}
static inline void mat4x4_scale(mat4x4 M, mat4x4 a, float k) {
int i;
for (i = 0; i < 4; ++i) vec4_scale(M[i], a[i], k);
}
static inline void mat4x4_scale_aniso(mat4x4 M, mat4x4 a, float x, float y, float z) {
int i;
vec4_scale(M[0], a[0], x);
vec4_scale(M[1], a[1], y);
vec4_scale(M[2], a[2], z);
for (i = 0; i < 4; ++i) {
M[3][i] = a[3][i];
}
}
static inline void mat4x4_mul(mat4x4 M, mat4x4 a, mat4x4 b) {
int k, r, c;
for (c = 0; c < 4; ++c)
for (r = 0; r < 4; ++r) {
M[c][r] = 0.f;
for (k = 0; k < 4; ++k) M[c][r] += a[k][r] * b[c][k];
}
}
static inline void mat4x4_mul_vec4(vec4 r, mat4x4 M, vec4 v) {
int i, j;
for (j = 0; j < 4; ++j) {
r[j] = 0.f;
for (i = 0; i < 4; ++i) r[j] += M[i][j] * v[i];
}
}
static inline void mat4x4_translate(mat4x4 T, float x, float y, float z) {
mat4x4_identity(T);
T[3][0] = x;
T[3][1] = y;
T[3][2] = z;
}
static inline void mat4x4_translate_in_place(mat4x4 M, float x, float y, float z) {
vec4 t = {x, y, z, 0};
vec4 r;
int i;
for (i = 0; i < 4; ++i) {
mat4x4_row(r, M, i);
M[3][i] += vec4_mul_inner(r, t);
}
}
static inline void mat4x4_from_vec3_mul_outer(mat4x4 M, vec3 a, vec3 b) {
int i, j;
for (i = 0; i < 4; ++i)
for (j = 0; j < 4; ++j) M[i][j] = i < 3 && j < 3 ? a[i] * b[j] : 0.f;
}
static inline void mat4x4_rotate(mat4x4 R, mat4x4 M, float x, float y, float z, float angle) {
float s = sinf(angle);
float c = cosf(angle);
vec3 u = {x, y, z};
if (vec3_len(u) > 1e-4) {
vec3_norm(u, u);
mat4x4 T;
mat4x4_from_vec3_mul_outer(T, u, u);
mat4x4 S = {{0, u[2], -u[1], 0}, {-u[2], 0, u[0], 0}, {u[1], -u[0], 0, 0}, {0, 0, 0, 0}};
mat4x4_scale(S, S, s);
mat4x4 C;
mat4x4_identity(C);
mat4x4_sub(C, C, T);
mat4x4_scale(C, C, c);
mat4x4_add(T, T, C);
mat4x4_add(T, T, S);
T[3][3] = 1.;
mat4x4_mul(R, M, T);
} else {
mat4x4_dup(R, M);
}
}
static inline void mat4x4_rotate_X(mat4x4 Q, mat4x4 M, float angle) {
float s = sinf(angle);
float c = cosf(angle);
mat4x4 R = {{1.f, 0.f, 0.f, 0.f}, {0.f, c, s, 0.f}, {0.f, -s, c, 0.f}, {0.f, 0.f, 0.f, 1.f}};
mat4x4_mul(Q, M, R);
}
static inline void mat4x4_rotate_Y(mat4x4 Q, mat4x4 M, float angle) {
float s = sinf(angle);
float c = cosf(angle);
mat4x4 R = {{c, 0.f, s, 0.f}, {0.f, 1.f, 0.f, 0.f}, {-s, 0.f, c, 0.f}, {0.f, 0.f, 0.f, 1.f}};
mat4x4_mul(Q, M, R);
}
static inline void mat4x4_rotate_Z(mat4x4 Q, mat4x4 M, float angle) {
float s = sinf(angle);
float c = cosf(angle);
mat4x4 R = {{c, s, 0.f, 0.f}, {-s, c, 0.f, 0.f}, {0.f, 0.f, 1.f, 0.f}, {0.f, 0.f, 0.f, 1.f}};
mat4x4_mul(Q, M, R);
}
static inline void mat4x4_invert(mat4x4 T, mat4x4 M) {
float s[6];
float c[6];
s[0] = M[0][0] * M[1][1] - M[1][0] * M[0][1];
s[1] = M[0][0] * M[1][2] - M[1][0] * M[0][2];
s[2] = M[0][0] * M[1][3] - M[1][0] * M[0][3];
s[3] = M[0][1] * M[1][2] - M[1][1] * M[0][2];
s[4] = M[0][1] * M[1][3] - M[1][1] * M[0][3];
s[5] = M[0][2] * M[1][3] - M[1][2] * M[0][3];
c[0] = M[2][0] * M[3][1] - M[3][0] * M[2][1];
c[1] = M[2][0] * M[3][2] - M[3][0] * M[2][2];
c[2] = M[2][0] * M[3][3] - M[3][0] * M[2][3];
c[3] = M[2][1] * M[3][2] - M[3][1] * M[2][2];
c[4] = M[2][1] * M[3][3] - M[3][1] * M[2][3];
c[5] = M[2][2] * M[3][3] - M[3][2] * M[2][3];
/* Assumes it is invertible */
float idet = 1.0f / (s[0] * c[5] - s[1] * c[4] + s[2] * c[3] + s[3] * c[2] - s[4] * c[1] + s[5] * c[0]);
T[0][0] = (M[1][1] * c[5] - M[1][2] * c[4] + M[1][3] * c[3]) * idet;
T[0][1] = (-M[0][1] * c[5] + M[0][2] * c[4] - M[0][3] * c[3]) * idet;
T[0][2] = (M[3][1] * s[5] - M[3][2] * s[4] + M[3][3] * s[3]) * idet;
T[0][3] = (-M[2][1] * s[5] + M[2][2] * s[4] - M[2][3] * s[3]) * idet;
T[1][0] = (-M[1][0] * c[5] + M[1][2] * c[2] - M[1][3] * c[1]) * idet;
T[1][1] = (M[0][0] * c[5] - M[0][2] * c[2] + M[0][3] * c[1]) * idet;
T[1][2] = (-M[3][0] * s[5] + M[3][2] * s[2] - M[3][3] * s[1]) * idet;
T[1][3] = (M[2][0] * s[5] - M[2][2] * s[2] + M[2][3] * s[1]) * idet;
T[2][0] = (M[1][0] * c[4] - M[1][1] * c[2] + M[1][3] * c[0]) * idet;
T[2][1] = (-M[0][0] * c[4] + M[0][1] * c[2] - M[0][3] * c[0]) * idet;
T[2][2] = (M[3][0] * s[4] - M[3][1] * s[2] + M[3][3] * s[0]) * idet;
T[2][3] = (-M[2][0] * s[4] + M[2][1] * s[2] - M[2][3] * s[0]) * idet;
T[3][0] = (-M[1][0] * c[3] + M[1][1] * c[1] - M[1][2] * c[0]) * idet;
T[3][1] = (M[0][0] * c[3] - M[0][1] * c[1] + M[0][2] * c[0]) * idet;
T[3][2] = (-M[3][0] * s[3] + M[3][1] * s[1] - M[3][2] * s[0]) * idet;
T[3][3] = (M[2][0] * s[3] - M[2][1] * s[1] + M[2][2] * s[0]) * idet;
}
static inline void mat4x4_orthonormalize(mat4x4 R, mat4x4 M) {
mat4x4_dup(R, M);
float s = 1.;
vec3 h;
vec3_norm(R[2], R[2]);
s = vec3_mul_inner(R[1], R[2]);
vec3_scale(h, R[2], s);
vec3_sub(R[1], R[1], h);
vec3_norm(R[2], R[2]);
s = vec3_mul_inner(R[1], R[2]);
vec3_scale(h, R[2], s);
vec3_sub(R[1], R[1], h);
vec3_norm(R[1], R[1]);
s = vec3_mul_inner(R[0], R[1]);
vec3_scale(h, R[1], s);
vec3_sub(R[0], R[0], h);
vec3_norm(R[0], R[0]);
}
static inline void mat4x4_frustum(mat4x4 M, float l, float r, float b, float t, float n, float f) {
M[0][0] = 2.f * n / (r - l);
M[0][1] = M[0][2] = M[0][3] = 0.f;
M[1][1] = 2.f * n / (t - b);
M[1][0] = M[1][2] = M[1][3] = 0.f;
M[2][0] = (r + l) / (r - l);
M[2][1] = (t + b) / (t - b);
M[2][2] = -(f + n) / (f - n);
M[2][3] = -1.f;
M[3][2] = -2.f * (f * n) / (f - n);
M[3][0] = M[3][1] = M[3][3] = 0.f;
}
static inline void mat4x4_ortho(mat4x4 M, float l, float r, float b, float t, float n, float f) {
M[0][0] = 2.f / (r - l);
M[0][1] = M[0][2] = M[0][3] = 0.f;
M[1][1] = 2.f / (t - b);
M[1][0] = M[1][2] = M[1][3] = 0.f;
M[2][2] = -2.f / (f - n);
M[2][0] = M[2][1] = M[2][3] = 0.f;
M[3][0] = -(r + l) / (r - l);
M[3][1] = -(t + b) / (t - b);
M[3][2] = -(f + n) / (f - n);
M[3][3] = 1.f;
}
static inline void mat4x4_perspective(mat4x4 m, float y_fov, float aspect, float n, float f) {
/* NOTE: Degrees are an unhandy unit to work with.
* linmath.h uses radians for everything! */
float const a = (float)(1.f / tan(y_fov / 2.f));
m[0][0] = a / aspect;
m[0][1] = 0.f;
m[0][2] = 0.f;
m[0][3] = 0.f;
m[1][0] = 0.f;
m[1][1] = a;
m[1][2] = 0.f;
m[1][3] = 0.f;
m[2][0] = 0.f;
m[2][1] = 0.f;
m[2][2] = -((f + n) / (f - n));
m[2][3] = -1.f;
m[3][0] = 0.f;
m[3][1] = 0.f;
m[3][2] = -((2.f * f * n) / (f - n));
m[3][3] = 0.f;
}
static inline void mat4x4_look_at(mat4x4 m, vec3 eye, vec3 center, vec3 up) {
/* Adapted from Android's OpenGL Matrix.java. */
/* See the OpenGL GLUT documentation for gluLookAt for a description */
/* of the algorithm. We implement it in a straightforward way: */
/* TODO: The negation of of can be spared by swapping the order of
* operands in the following cross products in the right way. */
vec3 f;
vec3_sub(f, center, eye);
vec3_norm(f, f);
vec3 s;
vec3_mul_cross(s, f, up);
vec3_norm(s, s);
vec3 t;
vec3_mul_cross(t, s, f);
m[0][0] = s[0];
m[0][1] = t[0];
m[0][2] = -f[0];
m[0][3] = 0.f;
m[1][0] = s[1];
m[1][1] = t[1];
m[1][2] = -f[1];
m[1][3] = 0.f;
m[2][0] = s[2];
m[2][1] = t[2];
m[2][2] = -f[2];
m[2][3] = 0.f;
m[3][0] = 0.f;
m[3][1] = 0.f;
m[3][2] = 0.f;
m[3][3] = 1.f;
mat4x4_translate_in_place(m, -eye[0], -eye[1], -eye[2]);
}
typedef float quat[4];
static inline void quat_identity(quat q) {
q[0] = q[1] = q[2] = 0.f;
q[3] = 1.f;
}
static inline void quat_add(quat r, quat a, quat b) {
int i;
for (i = 0; i < 4; ++i) r[i] = a[i] + b[i];
}
static inline void quat_sub(quat r, quat a, quat b) {
int i;
for (i = 0; i < 4; ++i) r[i] = a[i] - b[i];
}
static inline void quat_mul(quat r, quat p, quat q) {
vec3 w;
vec3_mul_cross(r, p, q);
vec3_scale(w, p, q[3]);
vec3_add(r, r, w);
vec3_scale(w, q, p[3]);
vec3_add(r, r, w);
r[3] = p[3] * q[3] - vec3_mul_inner(p, q);
}
static inline void quat_scale(quat r, quat v, float s) {
int i;
for (i = 0; i < 4; ++i) r[i] = v[i] * s;
}
static inline float quat_inner_product(quat a, quat b) {
float p = 0.f;
int i;
for (i = 0; i < 4; ++i) p += b[i] * a[i];
return p;
}
static inline void quat_conj(quat r, quat q) {
int i;
for (i = 0; i < 3; ++i) r[i] = -q[i];
r[3] = q[3];
}
#define quat_norm vec4_norm
static inline void quat_mul_vec3(vec3 r, quat q, vec3 v) {
quat v_ = {v[0], v[1], v[2], 0.f};
quat_conj(r, q);
quat_norm(r, r);
quat_mul(r, v_, r);
quat_mul(r, q, r);
}
static inline void mat4x4_from_quat(mat4x4 M, quat q) {
float a = q[3];
float b = q[0];
float c = q[1];
float d = q[2];
float a2 = a * a;
float b2 = b * b;
float c2 = c * c;
float d2 = d * d;
M[0][0] = a2 + b2 - c2 - d2;
M[0][1] = 2.f * (b * c + a * d);
M[0][2] = 2.f * (b * d - a * c);
M[0][3] = 0.f;
M[1][0] = 2 * (b * c - a * d);
M[1][1] = a2 - b2 + c2 - d2;
M[1][2] = 2.f * (c * d + a * b);
M[1][3] = 0.f;
M[2][0] = 2.f * (b * d + a * c);
M[2][1] = 2.f * (c * d - a * b);
M[2][2] = a2 - b2 - c2 + d2;
M[2][3] = 0.f;
M[3][0] = M[3][1] = M[3][2] = 0.f;
M[3][3] = 1.f;
}
static inline void mat4x4o_mul_quat(mat4x4 R, mat4x4 M, quat q) {
/* XXX: The way this is written only works for othogonal matrices. */
/* TODO: Take care of non-orthogonal case. */
quat_mul_vec3(R[0], q, M[0]);
quat_mul_vec3(R[1], q, M[1]);
quat_mul_vec3(R[2], q, M[2]);
R[3][0] = R[3][1] = R[3][2] = 0.f;
R[3][3] = 1.f;
}
static inline void quat_from_mat4x4(quat q, mat4x4 M) {
float r = 0.f;
int i;
int perm[] = {0, 1, 2, 0, 1};
int *p = perm;
for (i = 0; i < 3; i++) {
float m = M[i][i];
if (m < r) continue;
m = r;
p = &perm[i];
}
r = sqrtf(1.f + M[p[0]][p[0]] - M[p[1]][p[1]] - M[p[2]][p[2]]);
if (r < 1e-6) {
q[0] = 1.f;
q[1] = q[2] = q[3] = 0.f;
return;
}
q[0] = r / 2.f;
q[1] = (M[p[0]][p[1]] - M[p[1]][p[0]]) / (2.f * r);
q[2] = (M[p[2]][p[0]] - M[p[0]][p[2]]) / (2.f * r);
q[3] = (M[p[2]][p[1]] - M[p[1]][p[2]]) / (2.f * r);
}
#endif
@@ -1,466 +0,0 @@
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Modifications Copyright (C)2021 Advanced
* Micro Devices, Inc. All rights reserved.
*/
#include "VulkanBaseApp.h"
#include <iostream>
#include <iomanip>
#include <chrono>
#include <algorithm>
#include "linmath.h"
#include "hip/hip_runtime.h"
#include "SineWaveSimulation.h"
typedef float vec2[2];
std::string execution_path;
#ifndef NDEBUG
#define ENABLE_VALIDATION (false)
#else
#define ENABLE_VALIDATION (true)
#endif
#ifndef _WIN64
#define MAX_PATH 260
int GetModuleFileName(void* hndl, char* name, int size)
{
FILE* stream = fopen("/proc/self/cmdline", "r");
fgets(name, size, stream);
fclose(stream);
return strlen(name);
}
#endif
class VulkanCudaSineWave : public VulkanBaseApp
{
typedef struct UniformBufferObject_st {
mat4x4 modelViewProj;
} UniformBufferObject;
VkBuffer m_heightBuffer, m_xyBuffer, m_indexBuffer;
VkDeviceMemory m_heightMemory, m_xyMemory, m_indexMemory;
UniformBufferObject m_ubo;
VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore;
SineWaveSimulation m_sim;
hipStream_t m_stream;
hipExternalSemaphore_t m_cudaWaitSemaphore, m_cudaSignalSemaphore;
hipExternalMemory_t m_cudaVertMem;
float *m_cudaHeightMap;
using chrono_tp = std::chrono::time_point<std::chrono::high_resolution_clock>;
chrono_tp m_lastTime;
size_t m_lastFrame;
public:
VulkanCudaSineWave(size_t width, size_t height) :
VulkanBaseApp("vulkanCudaSineWave", ENABLE_VALIDATION),
m_heightBuffer(VK_NULL_HANDLE),
m_xyBuffer(VK_NULL_HANDLE),
m_indexBuffer(VK_NULL_HANDLE),
m_heightMemory(VK_NULL_HANDLE),
m_xyMemory(VK_NULL_HANDLE),
m_indexMemory(VK_NULL_HANDLE),
m_ubo(),
m_sim(width, height),
m_stream(0),
m_vkWaitSemaphore(VK_NULL_HANDLE),
m_vkSignalSemaphore(VK_NULL_HANDLE),
m_cudaWaitSemaphore(),
m_cudaSignalSemaphore(),
m_cudaVertMem(),
m_cudaHeightMap(nullptr),
m_lastFrame(0) {
// Our index buffer can only index 32-bits of the vertex buffer
if ((width * height) > (1ULL << 32ULL)) {
throw std::runtime_error("Requested height and width is too large for this sample!");
}
// Add our compiled vulkan shader files
char buffer[MAX_PATH] = { 0 }; //assuming none unicode
GetModuleFileName(NULL, buffer, MAX_PATH);
std::string str3 = std::string(buffer);
std::string str1 = "vert.spv" ; //sdkFindFilePath("sinewave.vert", execution_path.c_str());
char* vertex_shader_path = strdup(str1.c_str());
std::string str2 = "frag.spv" ; //sdkFindFilePath("sinewave.frag", execution_path.c_str());
char* fragment_shader_path = strdup(str2.c_str());
m_shaderFiles.push_back(std::make_pair(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_path));
m_shaderFiles.push_back(std::make_pair(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader_path));
}
~VulkanCudaSineWave() {
// Make sure there's no pending work before we start tearing down
checkHIPErrors(hipStreamSynchronize(m_stream));
if (m_vkSignalSemaphore != VK_NULL_HANDLE) {
checkHIPErrors(hipDestroyExternalSemaphore(m_cudaSignalSemaphore));
vkDestroySemaphore(m_device, m_vkSignalSemaphore, nullptr);
}
if (m_vkWaitSemaphore != VK_NULL_HANDLE) {
checkHIPErrors(hipDestroyExternalSemaphore(m_cudaWaitSemaphore));
vkDestroySemaphore(m_device, m_vkWaitSemaphore, nullptr);
}
if (m_xyBuffer != VK_NULL_HANDLE) {
vkDestroyBuffer(m_device, m_xyBuffer, nullptr);
}
if (m_xyMemory != VK_NULL_HANDLE) {
vkFreeMemory(m_device, m_xyMemory, nullptr);
}
if (m_heightBuffer != VK_NULL_HANDLE) {
vkDestroyBuffer(m_device, m_heightBuffer, nullptr);
}
if (m_heightMemory != VK_NULL_HANDLE) {
vkFreeMemory(m_device, m_heightMemory, nullptr);
}
if (m_cudaHeightMap) {
checkHIPErrors(hipDestroyExternalMemory(m_cudaVertMem));
}
if (m_indexBuffer != VK_NULL_HANDLE) {
vkDestroyBuffer(m_device, m_indexBuffer, nullptr);
}
if (m_indexMemory != VK_NULL_HANDLE) {
vkFreeMemory(m_device, m_indexMemory, nullptr);
}
}
void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) {
VkBuffer vertexBuffers[] = { m_heightBuffer, m_xyBuffer };
VkDeviceSize offsets[] = { 0, 0 };
vkCmdBindVertexBuffers(commandBuffer, 0, sizeof(vertexBuffers) / sizeof(vertexBuffers[0]), vertexBuffers, offsets);
vkCmdBindIndexBuffer(commandBuffer, m_indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(commandBuffer, (uint32_t)((m_sim.getWidth() - 1) * (m_sim.getHeight() - 1) * 6), 1, 0, 0, 0);
}
void getVertexDescriptions(std::vector<VkVertexInputBindingDescription>& bindingDesc, std::vector<VkVertexInputAttributeDescription>& attribDesc) {
bindingDesc.resize(2);
attribDesc.resize(2);
bindingDesc[0].binding = 0;
bindingDesc[0].stride = sizeof(float);
bindingDesc[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
bindingDesc[1].binding = 1;
bindingDesc[1].stride = sizeof(vec2);
bindingDesc[1].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
attribDesc[0].binding = 0;
attribDesc[0].location = 0;
attribDesc[0].format = VK_FORMAT_R32_SFLOAT;
attribDesc[0].offset = 0;
attribDesc[1].binding = 1;
attribDesc[1].location = 1;
attribDesc[1].format = VK_FORMAT_R32G32_SFLOAT;
attribDesc[1].offset = 0;
}
void getAssemblyStateInfo(VkPipelineInputAssemblyStateCreateInfo& info) {
info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
info.primitiveRestartEnable = VK_FALSE;
}
void getWaitFrameSemaphores(std::vector<VkSemaphore>& wait, std::vector< VkPipelineStageFlags>& waitStages) const {
if (m_currentFrame != 0) {
// Have vulkan wait until cuda is done with the vertex buffer before rendering
// We don't do this on the first frame, as the wait semaphore hasn't been initialized yet
wait.push_back(m_vkWaitSemaphore);
// We want to wait until all the pipeline commands are complete before letting cuda work
waitStages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
}
void getSignalFrameSemaphores(std::vector<VkSemaphore>& signal) const {
// Add this semaphore for vulkan to signal once the vertex buffer is ready for cuda to modify
signal.push_back(m_vkSignalSemaphore);
}
void initVulkanApp() {
int cuda_device = -1;
// Select cuda device where vulkan is running.
cuda_device = m_sim.initCuda(m_vkDeviceUUID, VK_UUID_SIZE);
if (cuda_device == -1)
{
printf("Error: No CUDA-Vulkan interop capable device found\n");
exit(EXIT_FAILURE);
}
m_sim.initCudaLaunchConfig(cuda_device);
// Create the cuda stream we'll be using
checkHIPErrors(hipStreamCreateWithFlags(&m_stream, hipStreamNonBlocking));
const size_t nVerts = m_sim.getWidth() * m_sim.getHeight();
const size_t nInds = (m_sim.getWidth() - 1) * (m_sim.getHeight() - 1) * 6;
// Create the height map cuda will write to
createExternalBuffer(nVerts * sizeof(float),
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
getDefaultMemHandleType(),
m_heightBuffer, m_heightMemory);
// Create the vertex buffer that will hold the xy coordinates for the grid
createBuffer(nVerts * sizeof(vec2),
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
m_xyBuffer, m_xyMemory);
// Create the index buffer that references from both buffers above
createBuffer(nInds * sizeof(uint32_t),
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
m_indexBuffer, m_indexMemory);
// Import the height map into cuda and retrieve a device pointer to use
importHipExternalMemory((void **)&m_cudaHeightMap, m_cudaVertMem, m_heightMemory, nVerts * sizeof(*m_cudaHeightMap), getDefaultMemHandleType());
// Set the height map to use in the simulation
m_sim.initSimulation(m_cudaHeightMap);
{
// Set up the initial values for the vertex buffers with Vulkan
void *stagingBase;
VkBuffer stagingBuffer;
VkDeviceMemory stagingMemory;
VkDeviceSize stagingSz = std::max(nVerts * sizeof(vec2), nInds * sizeof(uint32_t));
createBuffer(stagingSz, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingMemory);
vkMapMemory(m_device, stagingMemory, 0, stagingSz, 0, &stagingBase);
memset(stagingBase, 0, nVerts * sizeof(float));
copyBuffer(m_heightBuffer, stagingBuffer, nVerts * sizeof(float));
for (size_t y = 0; y < m_sim.getHeight(); y++) {
for (size_t x = 0; x < m_sim.getWidth(); x++) {
vec2 *stagedVert = (vec2 *)stagingBase;
stagedVert[y * m_sim.getWidth() + x][0] = (2.0f * x) / (m_sim.getWidth() - 1) - 1;
stagedVert[y * m_sim.getWidth() + x][1] = (2.0f * y) / (m_sim.getHeight() - 1) - 1;
}
}
copyBuffer(m_xyBuffer, stagingBuffer, nVerts * sizeof(vec2));
{
uint32_t *indices = (uint32_t *)stagingBase;
for (size_t y = 0; y < m_sim.getHeight() - 1; y++) {
for (size_t x = 0; x < m_sim.getWidth() - 1; x++) {
indices[0] = (uint32_t)((y + 0) * m_sim.getWidth() + (x + 0));
indices[1] = (uint32_t)((y + 1) * m_sim.getWidth() + (x + 0));
indices[2] = (uint32_t)((y + 0) * m_sim.getWidth() + (x + 1));
indices[3] = (uint32_t)((y + 1) * m_sim.getWidth() + (x + 0));
indices[4] = (uint32_t)((y + 1) * m_sim.getWidth() + (x + 1));
indices[5] = (uint32_t)((y + 0) * m_sim.getWidth() + (x + 1));
indices += 6;
}
}
}
copyBuffer(m_indexBuffer, stagingBuffer, nInds * sizeof(uint32_t));
vkUnmapMemory(m_device, stagingMemory);
vkDestroyBuffer(m_device, stagingBuffer, nullptr);
vkFreeMemory(m_device, stagingMemory, nullptr);
}
// Create the semaphore vulkan will signal when it's done with the vertex buffer
createExternalSemaphore(m_vkSignalSemaphore, getDefaultSemaphoreHandleType());
// Create the semaphore vulkan will wait for before using the vertex buffer
createExternalSemaphore(m_vkWaitSemaphore, getDefaultSemaphoreHandleType());
// Import the semaphore cuda will use -- vulkan's signal will be cuda's wait
importCudaExternalSemaphore(m_cudaWaitSemaphore, m_vkSignalSemaphore, getDefaultSemaphoreHandleType());
// Import the semaphore cuda will use -- cuda's signal will be vulkan's wait
importCudaExternalSemaphore(m_cudaSignalSemaphore, m_vkWaitSemaphore, getDefaultSemaphoreHandleType());
}
void importHipExternalMemory(void **cudaPtr, hipExternalMemory_t& cudaMem, VkDeviceMemory& vkMem, VkDeviceSize size, VkExternalMemoryHandleTypeFlagBits handleType) {
hipExternalMemoryHandleDesc externalMemoryHandleDesc = {};
if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT) {
externalMemoryHandleDesc.type = hipExternalMemoryHandleTypeOpaqueWin32;
}
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) {
externalMemoryHandleDesc.type = hipExternalMemoryHandleTypeOpaqueWin32Kmt;
}
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
externalMemoryHandleDesc.type = hipExternalMemoryHandleTypeOpaqueFd;
}
else {
throw std::runtime_error("Unknown handle type requested!");
}
externalMemoryHandleDesc.size = size;
#ifdef _WIN64
externalMemoryHandleDesc.handle.win32.handle = (HANDLE)getMemHandle(vkMem, handleType);
#else
externalMemoryHandleDesc.handle.fd = (int)(uintptr_t)getMemHandle(vkMem, handleType);
#endif
checkHIPErrors(hipImportExternalMemory(&cudaMem, &externalMemoryHandleDesc));
hipExternalMemoryBufferDesc externalMemBufferDesc = {};
externalMemBufferDesc.offset = 0;
externalMemBufferDesc.size = size;
externalMemBufferDesc.flags = 0;
checkHIPErrors(hipExternalMemoryGetMappedBuffer(cudaPtr, cudaMem, &externalMemBufferDesc));
}
void importCudaExternalSemaphore(hipExternalSemaphore_t& cudaSem, VkSemaphore& vkSem, VkExternalSemaphoreHandleTypeFlagBits handleType) {
hipExternalSemaphoreHandleDesc externalSemaphoreHandleDesc = {};
if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT) {
externalSemaphoreHandleDesc.type = hipExternalSemaphoreHandleTypeOpaqueWin32;
}
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) {
externalSemaphoreHandleDesc.type = hipExternalSemaphoreHandleTypeOpaqueWin32Kmt;
}
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
externalSemaphoreHandleDesc.type = hipExternalSemaphoreHandleTypeOpaqueFd;
}
else {
throw std::runtime_error("Unknown handle type requested!");
}
#ifdef _WIN64
externalSemaphoreHandleDesc.handle.win32.handle = (HANDLE)getSemaphoreHandle(vkSem, handleType);
#else
externalSemaphoreHandleDesc.handle.fd = (int)(uintptr_t)getSemaphoreHandle(vkSem, handleType);
#endif
externalSemaphoreHandleDesc.flags = 0;
checkHIPErrors(hipImportExternalSemaphore(&cudaSem, &externalSemaphoreHandleDesc));
}
VkDeviceSize getUniformSize() const {
return sizeof(UniformBufferObject);
}
void updateUniformBuffer(uint32_t imageIndex) {
{
mat4x4 view, proj;
vec3 eye = { 1.75f, 1.75f, 1.25f };
vec3 center = { 0.0f, 0.0f, -0.25f };
vec3 up = { 0.0f, 0.0f, 1.0f };
mat4x4_perspective(proj, (float)degreesToRadians(45.0f), m_swapChainExtent.width / (float)m_swapChainExtent.height, 0.1f, 10.0f);
proj[1][1] *= -1.0f; // Flip y axis
mat4x4_look_at(view, eye, center, up);
mat4x4_mul(m_ubo.modelViewProj, proj, view);
}
void *data;
vkMapMemory(m_device, m_uniformMemory[imageIndex], 0, getUniformSize(), 0, &data);
memcpy(data, &m_ubo, sizeof(m_ubo));
vkUnmapMemory(m_device, m_uniformMemory[imageIndex]);
}
std::vector<const char *> getRequiredExtensions() const {
std::vector<const char *> extensions;
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME);
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME);
return extensions;
}
std::vector<const char *> getRequiredDeviceExtensions() const {
std::vector<const char *> extensions;
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME);
#ifdef _WIN64
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME);
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME);
#else
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME);
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME);
#endif /* _WIN64 */
return extensions;
}
void drawFrame() {
static chrono_tp startTime = std::chrono::high_resolution_clock::now();
chrono_tp currentTime = std::chrono::high_resolution_clock::now();
float time = std::chrono::duration<float, std::chrono::seconds::period>(currentTime - startTime).count();
if (m_currentFrame == 0) {
m_lastTime = startTime;
}
float frame_time = std::chrono::duration<float, std::chrono::seconds::period>(currentTime - m_lastTime).count();
hipExternalSemaphoreWaitParams waitParams = {};
waitParams.flags = 0;
waitParams.params.fence.value = 1;
hipExternalSemaphoreSignalParams signalParams = {};
signalParams.flags = 0;
signalParams.params.fence.value = 0;
// Have vulkan draw the current frame...
VulkanBaseApp::drawFrame();
// Wait for vulkan to complete it's work
checkHIPErrors(hipWaitExternalSemaphoresAsync(&m_cudaWaitSemaphore, &waitParams, 1, m_stream));
// Now step the simulation
m_sim.stepSimulation(time, m_stream);
// Signal vulkan to continue with the updated buffers
checkHIPErrors(hipSignalExternalSemaphoresAsync(&m_cudaSignalSemaphore, &signalParams, 1, m_stream));
// Output a naive measurement of the frames per second every five seconds
if (frame_time > 5) {
std::cout << "Average FPS (over "
<< std::fixed << std::setprecision(2) << frame_time
<< " seconds): "
<< std::fixed << std::setprecision(2)
<< ((m_currentFrame - m_lastFrame) / frame_time)
<< std::endl;
m_lastFrame = m_currentFrame;
m_lastTime = currentTime;
}
}
};
int main(int argc, char **argv)
{
execution_path = argv[0];
VulkanCudaSineWave app((1ULL << 8ULL), (1ULL << 8ULL));
app.init();
app.mainLoop();
return 0;
}
@@ -1,38 +0,0 @@
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#version 450
#extension GL_ARB_separate_shader_objects : enable
layout(location = 0) in vec3 fragColor;
layout(location = 0) out vec4 outColor;
void main() {
outColor = vec4(fragColor, 1.0);
}
@@ -1,43 +0,0 @@
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#version 450
#extension GL_ARB_separate_shader_objects : enable
layout(binding = 0) uniform UniformBufferObject {
mat4 modelViewProj;
} ubo;
layout(location = 0) in float height;
layout(location = 1) in vec2 xyPos;
layout(location = 0) out vec3 fragColor;
void main() {
gl_Position = ubo.modelViewProj * vec4(xyPos.xy, height, 1.0f);
fragColor = vec3(0.0f, (height + 0.5f), 0.0f);
}
@@ -38,7 +38,6 @@ add_subdirectory(16_assembly_to_executable)
add_subdirectory(17_llvm_ir_to_executable)
add_subdirectory(18_cmake_hip_device)
add_subdirectory(19_cmake_lang)
#add_subdirectory(20_hip_vulkan)
add_subdirectory(21_cmake_hip_cxx_clang)
add_subdirectory(22_cmake_hip_lang)
add_subdirectory(23_cmake_hiprtc)