From 435cce8c1f5029a0001f31836288cdf8057a18e1 Mon Sep 17 00:00:00 2001 From: Sergey Kosarevsky Date: Sat, 14 Sep 2024 18:52:29 -0700 Subject: [PATCH] Added ray tracing shadows/AO example `samples/007_RayTracingAO.cpp` --- samples/007_RayTracingAO.cpp | 1271 ++++++++++++++++++++++++++++++++++ samples/CMakeLists.txt | 1 + 2 files changed, 1272 insertions(+) create mode 100644 samples/007_RayTracingAO.cpp diff --git a/samples/007_RayTracingAO.cpp b/samples/007_RayTracingAO.cpp new file mode 100644 index 0000000000..c70431a67f --- /dev/null +++ b/samples/007_RayTracingAO.cpp @@ -0,0 +1,1271 @@ +/* + * LightweightVK + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#if !defined(_USE_MATH_DEFINES) +#define _USE_MATH_DEFINES +#endif // _USE_MATH_DEFINES +#include +#include +#include +#include +#include + +#define GLM_ENABLE_EXPERIMENTAL +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include + +// we are going to use raw Vulkan here +#include + +constexpr uint32_t kMeshCacheVersion = 0xC0DE000A; +constexpr int kNumSamplesMSAA = 4; +#if defined(NDEBUG) +constexpr bool kEnableValidationLayers = false; +#else +constexpr bool kEnableValidationLayers = true; +#endif // NDEBUG + +std::string folderThirdParty; +std::string folderContentRoot; + +std::unique_ptr imgui_; + +const char* kCodeFullscreenVS = R"( +layout (location=0) out vec2 uv; +void main() { + // generate a triangle covering the entire screen + uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(uv * vec2(2, -2) + vec2(-1, 1), 0.0, 1.0); +} +)"; + +const char* kCodeFullscreenFS = R"( +layout (location=0) in vec2 uv; +layout (location=0) out vec4 out_FragColor; + +layout(push_constant) uniform constants { + uint tex; +} pc; + +void main() { + out_FragColor = textureBindless2D(pc.tex, 0, uv); +} +)"; + +const char* kCodeZPrepassVS = R"( +layout (location=0) in vec3 pos; +layout (location=3) in uint mtlIndex; + +struct Material { + vec4 ambient; + vec4 diffuse; +}; + +layout(std430, buffer_reference) readonly buffer PerFrame { + mat4 proj; + mat4 view; +}; + +layout(std430, buffer_reference) readonly buffer PerObject { + mat4 model; +}; + +layout(std430, buffer_reference) readonly buffer Materials { + Material mtl[]; +}; + +layout(push_constant) uniform constants { + PerFrame perFrame; + PerObject perObject; + Materials materials; +} pc; + +// output +layout (location=0) flat out Material mtl; + +void main() { + mat4 proj = pc.perFrame.proj; + mat4 view = pc.perFrame.view; + mat4 model = pc.perObject.model; + mtl = pc.materials.mtl[mtlIndex]; + gl_Position = proj * view * model * vec4(pos, 1.0); +} +)"; + +const char* kCodeZPrepassFS = R"( +#version 460 + +struct Material { + vec4 ambient; + vec4 diffuse; +}; + +layout (location=0) flat in Material mtl; + +layout(push_constant) uniform constants { + uvec2 perFrame; +} pc; + +void main() { + vec4 Ka = mtl.ambient; + vec4 Kd = mtl.diffuse; + if (Kd.a < 0.5) + discard; +}; +)"; + +const char* kCodeVS = R"( +layout (location=0) in vec3 pos; +layout (location=1) in vec2 uv; +layout (location=2) in uint normal; // Octahedral 16-bit https://www.shadertoy.com/view/llfcRl +layout (location=3) in uint mtlIndex; + +struct Material { + vec4 ambient; + vec4 diffuse; +}; + +layout(std430, buffer_reference) readonly buffer PerFrame { + mat4 proj; + mat4 view; +}; + +layout(std430, buffer_reference) readonly buffer PerObject { + mat4 model; +}; + +layout(std430, buffer_reference) readonly buffer Materials { + Material mtl[]; +}; + +layout(push_constant) uniform constants { + vec4 lightDir; + PerFrame perFrame; + PerObject perObject; + Materials materials; + uint tlas; +} pc; + +// output +struct PerVertex { + vec3 worldPos; + vec3 normal; + vec2 uv; +}; +layout (location=0) out PerVertex vtx; +layout (location=5) flat out Material mtl; +// + +// https://www.shadertoy.com/view/llfcRl +vec2 unpackSnorm2x8(uint d) { + return vec2(uvec2(d, d >> 8) & 255u) / 127.5 - 1.0; +} +vec3 unpackOctahedral16(uint data) { + vec2 v = unpackSnorm2x8(data); + // https://x.com/Stubbesaurus/status/937994790553227264 + vec3 n = vec3(v, 1.0 - abs(v.x) - abs(v.y)); + float t = max(-n.z, 0.0); + n.x += (n.x > 0.0) ? -t : t; + n.y += (n.y > 0.0) ? -t : t; + return normalize(n); +} +// + +void main() { + mat4 proj = pc.perFrame.proj; + mat4 view = pc.perFrame.view; + mat4 model = pc.perObject.model; + mtl = pc.materials.mtl[mtlIndex]; + gl_Position = proj * view * model * vec4(pos, 1.0); + + // Compute the normal in world-space + mat3 norm_matrix = transpose(inverse(mat3(model))); + vtx.worldPos = (model * vec4(pos, 1.0)).xyz; + vtx.normal = normalize(norm_matrix * unpackOctahedral16(normal)); + vtx.uv = uv; +} +)"; + +const char* kCodeFS = R"( +#version 460 +#extension GL_EXT_buffer_reference_uvec2 : require +#extension GL_EXT_debug_printf : enable +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_samplerless_texture_functions : require +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_ray_query : require + +layout(set = 0, binding = 0) uniform texture2D kTextures2D[]; +layout(set = 0, binding = 4) uniform accelerationStructureEXT kTLAS[]; + +layout(std430, buffer_reference) readonly buffer PerFrame { + mat4 proj; + mat4 view; + mat4 light; +}; + +struct Material { + vec4 ambient; + vec4 diffuse; +}; + +struct PerVertex { + vec3 worldPos; + vec3 normal; + vec2 uv; +}; + +layout(push_constant) uniform constants { + vec4 lightDir; + PerFrame perFrame; + uvec2 dummy0; + uvec2 dummy1; + uint tlas; + bool enableShadows; + bool enableAO; + bool aoDistanceBased; + int aoSamples; + float aoRadius; + float aoPower; +} pc; + +layout (location=0) in PerVertex vtx; +layout (location=5) flat in Material mtl; + +layout (location=0) out vec4 out_FragColor; + +void computeTBN(in vec3 n, out vec3 x, out vec3 y) { + float yz = -n.y * n.z; + y = normalize(((abs(n.z) > 0.9999) ? vec3(-n.x * n.y, 1.0 - n.y * n.y, yz) : vec3(-n.x * n.z, yz, 1.0 - n.z * n.z))); + x = cross(y, n); +} + +float traceAO(rayQueryEXT rq, vec3 origin, vec3 dir) { + uint flags = pc.aoDistanceBased ? gl_RayFlagsTerminateOnFirstHitEXT : gl_RayFlagsNoneEXT; + + rayQueryInitializeEXT(rq, kTLAS[pc.tlas], flags, 0xFF, origin, 0.0f, dir, pc.aoRadius); + + while (rayQueryProceedEXT(rq)) {} + + if (rayQueryGetIntersectionTypeEXT(rq, true) != gl_RayQueryCommittedIntersectionNoneEXT) { + if (pc.aoDistanceBased) return 1; + float length = 1.0 - (rayQueryGetIntersectionTEXT(rq, true) / pc.aoRadius); + return length; + } + + return 0; +} + +// generate a random unsigned int in [0, 2^24) given the previous RNG state using the Numerical Recipes LCG +uint lcg(inout uint prev) { + uint LCG_A = 1664525u; + uint LCG_C = 1013904223u; + prev = (LCG_A * prev + LCG_C); + return prev & 0x00FFFFFF; +} + +// Generate a random float in [0, 1) given the previous RNG state +float rnd(inout uint seed) { + return (float(lcg(seed)) / float(0x01000000)); +} + +// Generate a random unsigned int from two unsigned int values, using 16 pairs of rounds of the Tiny Encryption Algorithm. See Zafar, Olano, and Curtis, +// "GPU Random Numbers via the Tiny Encryption Algorithm" +uint tea(uint val0, uint val1) { + uint v0 = val0; + uint v1 = val1; + uint s0 = 0; + + for(uint n = 0; n < 16; n++) { + s0 += 0x9e3779b9; + v0 += ((v1 << 4) + 0xa341316c) ^ (v1 + s0) ^ ((v1 >> 5) + 0xc8013ea4); + v1 += ((v0 << 4) + 0xad90777d) ^ (v0 + s0) ^ ((v0 >> 5) + 0x7e95761e); + } + + return v0; +} + +void main() { + vec4 Ka = mtl.ambient; + vec4 Kd = mtl.diffuse; + if (Kd.a < 0.5) + discard; + vec3 n = normalize(vtx.normal); + + float occlusion = 1.0; + + // ambient occlusion + if (pc.enableAO) + { + vec3 origin = vtx.worldPos + n * 0.001; // avoid self-occlusion + + vec3 tangent, bitangent; + computeTBN(n, tangent, bitangent); + + uint seed = tea(uint(gl_FragCoord.y * 4003.0 + gl_FragCoord.x), 0); // prime + + float occl = 0.0; + + for(int i = 0; i < pc.aoSamples; i++) { + float r1 = rnd(seed); + float r2 = rnd(seed); + float sq = sqrt(1.0 - r2); + float phi = 2 * 3.141592653589 * r1; + vec3 direction = vec3(cos(phi) * sq, sin(phi) * sq, sqrt(r2)); + direction = direction.x * tangent + direction.y * bitangent + direction.z * n; + rayQueryEXT rayQuery; + occl += traceAO(rayQuery, origin, direction); + } + occlusion = 1 - (occl / pc.aoSamples); + occlusion = pow(clamp(occlusion, 0, 1), pc.aoPower); + } + // directional shadow + if (pc.enableShadows) { + rayQueryEXT rq; + rayQueryInitializeEXT(rq, kTLAS[pc.tlas], gl_RayFlagsTerminateOnFirstHitEXT, 0xff, vtx.worldPos, 0.01, pc.lightDir.xyz, +1000.0); + while (rayQueryProceedEXT(rq)) {} + if (rayQueryGetIntersectionTypeEXT(rq, true) != gl_RayQueryCommittedIntersectionNoneEXT) occlusion *= 0.5; + } + + out_FragColor = Ka + Kd * occlusion; +}; +)"; + +using glm::mat4; +using glm::vec2; +using glm::vec3; +using glm::vec4; + +int width_ = 0; +int height_ = 0; +FramesPerSecondCounter fps_; + +constexpr uint32_t kNumBufferedFrames = 3; + +std::unique_ptr ctx_; +lvk::Framebuffer fbMain_; // swapchain +lvk::Framebuffer fbOffscreen_; +lvk::Holder fbOffscreenColor_; +lvk::Holder fbOffscreenDepth_; +lvk::Holder fbOffscreenResolve_; +lvk::Holder smMeshVert_; +lvk::Holder smMeshFrag_; +lvk::Holder smMeshVertZPrepass_; +lvk::Holder smMeshFragZPrepass_; +lvk::Holder smFullscreenVert_; +lvk::Holder smFullscreenFrag_; +lvk::Holder renderPipelineState_Mesh_; +lvk::Holder renderPipelineState_MeshZPrepass_; +lvk::Holder renderPipelineState_Fullscreen_; +lvk::Holder vb0_, ib0_; // buffers for vertices and indices +lvk::Holder sbMaterials_; // storage buffer for materials +lvk::Holder sbInstances_; // storage buffer for TLAS instances +std::vector> ubPerFrame_, ubPerObject_; +lvk::RenderPass renderPassOffscreen_; +lvk::RenderPass renderPassZPrepass_; +lvk::RenderPass renderPassMain_; +lvk::Holder BLAS; +lvk::Holder TLAS; + +// scene navigation +CameraPositioner_FirstPerson positioner_(vec3(-100, 40, -47), vec3(0, 35, 0), vec3(0, 1, 0)); +Camera camera_(positioner_); +glm::vec2 mousePos_ = glm::vec2(0.0f); +bool mousePressed_ = false; + +bool enableShadows_ = true; +bool enableAO_ = true; + +int aoSamples_ = 8; +bool aoDistanceBased_ = false; +float aoRadius_ = 8.0f; +float aoPower_ = 1.0f; + +vec3 lightDir_ = normalize(vec3(0.032f, 0.835f, 0.549f)); + +struct VertexData { + vec3 position; + uint32_t uv; // hvec2 + uint16_t normal; // Octahedral 16-bit https://www.shadertoy.com/view/llfcRl + uint16_t mtlIndex; +}; + +static_assert(sizeof(VertexData) == 5 * sizeof(uint32_t)); + +vec2 msign(vec2 v) { + return vec2(v.x >= 0.0 ? 1.0f : -1.0f, v.y >= 0.0 ? 1.0f : -1.0f); +} + +// https://www.shadertoy.com/view/llfcRl +uint16_t packSnorm2x8(vec2 v) { + glm::uvec2 d = glm::uvec2(round(127.5f + v * 127.5f)); + return d.x | (d.y << 8u); +} + +// https://www.shadertoy.com/view/llfcRl +uint16_t packOctahedral16(vec3 n) { + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + return ::packSnorm2x8((n.z >= 0.0) ? vec2(n.x, n.y) : (vec2(1.0) - abs(vec2(n.y, n.x))) * msign(vec2(n))); +} + +std::vector vertexData_; +std::vector indexData_; + +struct UniformsPerFrame { + mat4 proj; + mat4 view; +} perFrame_; + +struct UniformsPerObject { + mat4 model; +}; +#define MAX_MATERIAL_NAME 128 + +struct CachedMaterial { + char name[MAX_MATERIAL_NAME] = {}; + vec3 ambient = vec3(0.0f); + vec3 diffuse = vec3(0.0f); +}; + +// this goes into our GLSL shaders +struct GPUMaterial { + vec4 ambient = vec4(0.0f); + vec4 diffuse = vec4(0.0f); +}; + +static_assert(sizeof(GPUMaterial) % 16 == 0); + +std::vector cachedMaterials_; +std::vector materials_; + +bool initModel(); +void createPipelines(); +void createOffscreenFramebuffer(); + +bool init() { + for (uint32_t i = 0; i != kNumBufferedFrames; i++) { + ubPerFrame_.push_back(ctx_->createBuffer({.usage = lvk::BufferUsageBits_Uniform, + .storage = lvk::StorageType_HostVisible, + .size = sizeof(UniformsPerFrame), + .debugName = "Buffer: uniforms (per frame)"}, + nullptr)); + ubPerObject_.push_back(ctx_->createBuffer({.usage = lvk::BufferUsageBits_Uniform, + .storage = lvk::StorageType_HostVisible, + .size = sizeof(UniformsPerObject), + .debugName = "Buffer: uniforms (per object)"}, + nullptr)); + } + + renderPassZPrepass_ = {.color = {{ + .loadOp = lvk::LoadOp_Clear, + .storeOp = kNumSamplesMSAA > 1 ? lvk::StoreOp_MsaaResolve : lvk::StoreOp_Store, + .clearColor = {0.0f, 0.0f, 0.0f, 1.0f}, + }}, + .depth = { + .loadOp = lvk::LoadOp_Clear, + .storeOp = lvk::StoreOp_Store, + .clearDepth = 1.0f, + }}; + + renderPassOffscreen_ = {.color = {{ + .loadOp = lvk::LoadOp_Clear, + .storeOp = kNumSamplesMSAA > 1 ? lvk::StoreOp_MsaaResolve : lvk::StoreOp_Store, + .clearColor = {0.0f, 0.0f, 0.0f, 1.0f}, + }}, + .depth = { + .loadOp = lvk::LoadOp_Load, + .storeOp = lvk::StoreOp_DontCare, + }}; + + renderPassMain_ = { + .color = {{.loadOp = lvk::LoadOp_Clear, .storeOp = lvk::StoreOp_Store, .clearColor = {0.0f, 0.0f, 0.0f, 1.0f}}}, + }; + + fbMain_ = { + .color = {{.texture = ctx_->getCurrentSwapchainTexture()}}, + }; + + createOffscreenFramebuffer(); + createPipelines(); + + imgui_ = std::make_unique( + *ctx_, (folderThirdParty + "3D-Graphics-Rendering-Cookbook/data/OpenSans-Light.ttf").c_str(), float(height_) / 70.0f); + + if (!initModel()) { + return false; + } + + return true; +} + +void destroy() { + imgui_ = nullptr; + + vb0_ = nullptr; + ib0_ = nullptr; + sbMaterials_ = nullptr; + ubPerFrame_.clear(); + ubPerObject_.clear(); + smMeshVert_ = nullptr; + smMeshFrag_ = nullptr; + smMeshVertZPrepass_ = nullptr; + smMeshFragZPrepass_ = nullptr; + smFullscreenVert_ = nullptr; + smFullscreenFrag_ = nullptr; + renderPipelineState_Mesh_ = nullptr; + renderPipelineState_MeshZPrepass_ = nullptr; + renderPipelineState_Fullscreen_ = nullptr; + ctx_->destroy(fbMain_); + fbOffscreenColor_ = nullptr; + fbOffscreenDepth_ = nullptr; + fbOffscreenResolve_ = nullptr; + TLAS = nullptr; + BLAS = nullptr; + sbInstances_ = nullptr; + ctx_ = nullptr; +} + +bool loadAndCache(const char* cacheFileName) { + LVK_PROFILER_FUNCTION(); + + // load 3D model and cache it + LLOGL("Loading `exterior.obj`... It can take a while in debug builds...\n"); + + tinyobj::attrib_t attrib; + std::vector shapes; + std::vector materials; + + std::string warn; + std::string err; + + const bool ret = tinyobj::LoadObj(&attrib, + &shapes, + &materials, + &warn, + &err, + (folderContentRoot + "src/bistro/Exterior/exterior.obj").c_str(), + (folderContentRoot + "src/bistro/Exterior/").c_str()); + + if (!LVK_VERIFY(ret)) { + LVK_ASSERT_MSG(ret, "Did you read the tutorial at the top of this file?"); + return false; + } + + // loop over shapes as described in https://github.com/tinyobjloader/tinyobjloader + for (size_t s = 0; s < shapes.size(); s++) { + size_t index_offset = 0; + for (size_t f = 0; f < shapes[s].mesh.num_face_vertices.size(); f++) { + LVK_ASSERT(shapes[s].mesh.num_face_vertices[f] == 3); + + for (size_t v = 0; v < 3; v++) { + tinyobj::index_t idx = shapes[s].mesh.indices[index_offset + v]; + + const vec3 pos(attrib.vertices[3 * size_t(idx.vertex_index) + 0], + attrib.vertices[3 * size_t(idx.vertex_index) + 1], + attrib.vertices[3 * size_t(idx.vertex_index) + 2]); + + const bool hasNormal = (idx.normal_index >= 0); + + const vec3 normal = hasNormal ? vec3(attrib.normals[3 * size_t(idx.normal_index) + 0], + attrib.normals[3 * size_t(idx.normal_index) + 1], + attrib.normals[3 * size_t(idx.normal_index) + 2]) + : vec3(0, 0, 1); + + const bool hasUV = (idx.texcoord_index >= 0); + + const vec2 uv = + hasUV ? vec2(attrib.texcoords[2 * size_t(idx.texcoord_index) + 0], attrib.texcoords[2 * size_t(idx.texcoord_index) + 1]) + : vec2(0); + + const int mtlIndex = shapes[s].mesh.material_ids[f]; + + LVK_ASSERT(mtlIndex >= 0 && mtlIndex < materials.size()); + + vertexData_.push_back({ + .position = pos, + .uv = glm::packHalf2x16(uv), + .normal = packOctahedral16(normal), + .mtlIndex = (uint16_t)mtlIndex, + }); + } + index_offset += 3; + } + } + + // repack the mesh as described in https://github.com/zeux/meshoptimizer + { + // 1. Generate an index buffer + const size_t indexCount = vertexData_.size(); + std::vector remap(indexCount); + const size_t vertexCount = + meshopt_generateVertexRemap(remap.data(), nullptr, indexCount, vertexData_.data(), indexCount, sizeof(VertexData)); + // 2. Remap vertices + std::vector remappedVertices; + indexData_.resize(indexCount); + remappedVertices.resize(vertexCount); + meshopt_remapIndexBuffer(indexData_.data(), nullptr, indexCount, &remap[0]); + meshopt_remapVertexBuffer(remappedVertices.data(), vertexData_.data(), indexCount, sizeof(VertexData), remap.data()); + vertexData_ = remappedVertices; + // 3. Optimize for the GPU vertex cache reuse and overdraw + meshopt_optimizeVertexCache(indexData_.data(), indexData_.data(), indexCount, vertexCount); + meshopt_optimizeOverdraw( + indexData_.data(), indexData_.data(), indexCount, &vertexData_[0].position.x, vertexCount, sizeof(VertexData), 1.05f); + meshopt_optimizeVertexFetch(vertexData_.data(), indexData_.data(), indexCount, vertexData_.data(), vertexCount, sizeof(VertexData)); + } + + // loop over materials + for (auto& m : materials) { + CachedMaterial mtl; + mtl.ambient = vec3(m.ambient[0], m.ambient[1], m.ambient[2]); + mtl.diffuse = vec3(m.diffuse[0], m.diffuse[1], m.diffuse[2]); + LVK_ASSERT(m.name.length() < MAX_MATERIAL_NAME); + strcat(mtl.name, m.name.c_str()); + cachedMaterials_.push_back(mtl); + } + + LLOGL("Caching mesh...\n"); + + FILE* cacheFile = fopen(cacheFileName, "wb"); + if (!cacheFile) { + return false; + } + const uint32_t numMaterials = (uint32_t)cachedMaterials_.size(); + const uint32_t numVertices = (uint32_t)vertexData_.size(); + const uint32_t numIndices = (uint32_t)indexData_.size(); + fwrite(&kMeshCacheVersion, sizeof(kMeshCacheVersion), 1, cacheFile); + fwrite(&numMaterials, sizeof(numMaterials), 1, cacheFile); + fwrite(&numVertices, sizeof(numVertices), 1, cacheFile); + fwrite(&numIndices, sizeof(numIndices), 1, cacheFile); + fwrite(cachedMaterials_.data(), sizeof(CachedMaterial), numMaterials, cacheFile); + fwrite(vertexData_.data(), sizeof(VertexData), numVertices, cacheFile); + fwrite(indexData_.data(), sizeof(uint32_t), numIndices, cacheFile); + return fclose(cacheFile) == 0; +} + +bool loadFromCache(const char* cacheFileName) { + FILE* cacheFile = fopen(cacheFileName, "rb"); + SCOPE_EXIT { + if (cacheFile) { + fclose(cacheFile); + } + }; + if (!cacheFile) { + return false; + } +#define CHECK_READ(expected, read) \ + if ((read) != (expected)) { \ + return false; \ + } + uint32_t versionProbe = 0; + CHECK_READ(1, fread(&versionProbe, sizeof(versionProbe), 1, cacheFile)); + if (versionProbe != kMeshCacheVersion) { + LLOGL("Cache file has wrong version id\n"); + return false; + } + uint32_t numMaterials = 0; + uint32_t numVertices = 0; + uint32_t numIndices = 0; + CHECK_READ(1, fread(&numMaterials, sizeof(numMaterials), 1, cacheFile)); + CHECK_READ(1, fread(&numVertices, sizeof(numVertices), 1, cacheFile)); + CHECK_READ(1, fread(&numIndices, sizeof(numIndices), 1, cacheFile)); + cachedMaterials_.resize(numMaterials); + vertexData_.resize(numVertices); + indexData_.resize(numIndices); + CHECK_READ(numMaterials, fread(cachedMaterials_.data(), sizeof(CachedMaterial), numMaterials, cacheFile)); + CHECK_READ(numVertices, fread(vertexData_.data(), sizeof(VertexData), numVertices, cacheFile)); + CHECK_READ(numIndices, fread(indexData_.data(), sizeof(uint32_t), numIndices, cacheFile)); +#undef CHECK_READ + return true; +} + +bool initModel() { + const std::string cacheFileName = folderContentRoot + "cache2.data"; + + if (!loadFromCache(cacheFileName.c_str())) { + if (!LVK_VERIFY(loadAndCache(cacheFileName.c_str()))) { + LVK_ASSERT_MSG(false, "Cannot load 3D model"); + return false; + } + } + + for (const auto& mtl : cachedMaterials_) { + materials_.push_back(GPUMaterial{vec4(mtl.ambient, 1.0f), vec4(mtl.diffuse, 1.0f)}); + } + sbMaterials_ = ctx_->createBuffer({.usage = lvk::BufferUsageBits_Storage, + .storage = lvk::StorageType_Device, + .size = sizeof(GPUMaterial) * materials_.size(), + .data = materials_.data(), + .debugName = "Buffer: materials"}, + nullptr); + + vb0_ = ctx_->createBuffer({.usage = lvk::BufferUsageBits_Vertex | lvk::BufferUsageBits_AccelStructBuildInputReadOnly, + .storage = lvk::StorageType_Device, + .size = sizeof(VertexData) * vertexData_.size(), + .data = vertexData_.data(), + .debugName = "Buffer: vertex"}, + nullptr); + ib0_ = ctx_->createBuffer({.usage = lvk::BufferUsageBits_Index | lvk::BufferUsageBits_AccelStructBuildInputReadOnly, + .storage = lvk::StorageType_Device, + .size = sizeof(uint32_t) * indexData_.size(), + .data = indexData_.data(), + .debugName = "Buffer: index"}, + nullptr); + + const glm::mat3x4 transformMatrix(1.0f); + + lvk::Holder transformBuffer = ctx_->createBuffer({ + .usage = lvk::BufferUsageBits_AccelStructBuildInputReadOnly, + .storage = lvk::StorageType_HostVisible, + .size = sizeof(glm::mat3x4), + .data = &transformMatrix, + }); + + BLAS = ctx_->createAccelerationStructure({ + .type = lvk::AccelStructType_BLAS, + .geometryType = lvk::AccelStructGeomType_Triangles, + .vertexFormat = lvk::VertexFormat::Float3, + .vertexBuffer = vb0_, + .vertexStride = sizeof(VertexData), + .numVertices = (uint32_t)vertexData_.size(), + .indexFormat = lvk::IndexFormat_UI32, + .indexBuffer = ib0_, + .transformBuffer = transformBuffer, + .buildRange = {.primitiveCount = (uint32_t)indexData_.size() / 3}, + .buildFlags = lvk::AccelStructBuildFlagBits_PreferFastTrace, + .debugName = "BLAS", + }); + + const glm::mat3x4 transform(glm::scale(mat4(1.0f), vec3(0.05f))); + + const lvk::AccelStructInstance instance{ + // clang-format off + .transform = (const lvk::mat3x4&)transform, + // clang-format on + .instanceCustomIndex = 0, + .mask = 0xff, + .instanceShaderBindingTableRecordOffset = 0, + .flags = lvk::AccelStructInstanceFlagBits_TriangleFacingCullDisable, + .accelerationStructureReference = ctx_->gpuAddress(BLAS), + }; + + // Buffer for instance data + sbInstances_ = ctx_->createBuffer(lvk::BufferDesc{ + .usage = lvk::BufferUsageBits_AccelStructBuildInputReadOnly, + .storage = lvk::StorageType_HostVisible, + .size = sizeof(VkAccelerationStructureInstanceKHR), + .data = &instance, + .debugName = "sbInstances_", + }); + + TLAS = ctx_->createAccelerationStructure({ + .type = lvk::AccelStructType_TLAS, + .geometryType = lvk::AccelStructGeomType_Instances, + .instancesBuffer = sbInstances_, + .buildRange = {.primitiveCount = 1}, + .buildFlags = lvk::AccelStructBuildFlagBits_PreferFastTrace, + }); + + return true; +} + +void createPipelines() { + if (renderPipelineState_Mesh_.valid()) { + return; + } + + smMeshVert_ = ctx_->createShaderModule({kCodeVS, lvk::Stage_Vert, "Shader Module: main (vert)"}); + smMeshFrag_ = ctx_->createShaderModule({kCodeFS, lvk::Stage_Frag, "Shader Module: main (frag)"}); + smMeshVertZPrepass_ = ctx_->createShaderModule({kCodeZPrepassVS, lvk::Stage_Vert, "Shader Module: main zprepass (vert)"}); + smMeshFragZPrepass_ = ctx_->createShaderModule({kCodeZPrepassFS, lvk::Stage_Frag, "Shader Module: main zprepass (frag)"}); + smFullscreenVert_ = ctx_->createShaderModule({kCodeFullscreenVS, lvk::Stage_Vert, "Shader Module: fullscreen (vert)"}); + smFullscreenFrag_ = ctx_->createShaderModule({kCodeFullscreenFS, lvk::Stage_Frag, "Shader Module: fullscreen (frag)"}); + + { + const lvk::VertexInput vdesc = { + .attributes = + { + {.location = 0, .format = lvk::VertexFormat::Float3, .offset = offsetof(VertexData, position)}, + {.location = 1, .format = lvk::VertexFormat::HalfFloat2, .offset = offsetof(VertexData, uv)}, + {.location = 2, .format = lvk::VertexFormat::UShort1, .offset = offsetof(VertexData, normal)}, + {.location = 3, .format = lvk::VertexFormat::UShort1, .offset = offsetof(VertexData, mtlIndex)}, + }, + .inputBindings = {{.stride = sizeof(VertexData)}}, + }; + + lvk::RenderPipelineDesc desc = { + .vertexInput = vdesc, + .smVert = smMeshVert_, + .smFrag = smMeshFrag_, + .color = {{.format = ctx_->getFormat(fbOffscreen_.color[0].texture)}}, + .depthFormat = ctx_->getFormat(fbOffscreen_.depthStencil.texture), + .cullMode = lvk::CullMode_Back, + .frontFaceWinding = lvk::WindingMode_CCW, + .samplesCount = kNumSamplesMSAA, + .debugName = "Pipeline: mesh", + }; + + renderPipelineState_Mesh_ = ctx_->createRenderPipeline(desc, nullptr); + } + { + const lvk::VertexInput vdesc = { + .attributes = + { + {.location = 0, .format = lvk::VertexFormat::Float3, .offset = offsetof(VertexData, position)}, + {.location = 3, .format = lvk::VertexFormat::UShort1, .offset = offsetof(VertexData, mtlIndex)}, + }, + .inputBindings = {{.stride = sizeof(VertexData)}}, + }; + lvk::RenderPipelineDesc desc = { + .vertexInput = vdesc, + .smVert = smMeshVertZPrepass_, + .smFrag = smMeshFragZPrepass_, + .color = {{.format = ctx_->getFormat(fbOffscreen_.color[0].texture)}}, + .depthFormat = ctx_->getFormat(fbOffscreen_.depthStencil.texture), + .cullMode = lvk::CullMode_Back, + .frontFaceWinding = lvk::WindingMode_CCW, + .samplesCount = kNumSamplesMSAA, + .debugName = "Pipeline: mesh z-prepass", + }; + + renderPipelineState_MeshZPrepass_ = ctx_->createRenderPipeline(desc, nullptr); + } + + // fullscreen + { + const lvk::RenderPipelineDesc desc = { + .smVert = smFullscreenVert_, + .smFrag = smFullscreenFrag_, + .color = {{.format = ctx_->getFormat(fbMain_.color[0].texture)}}, + .depthFormat = ctx_->getFormat(fbMain_.depthStencil.texture), + .cullMode = lvk::CullMode_None, + .debugName = "Pipeline: fullscreen", + }; + renderPipelineState_Fullscreen_ = ctx_->createRenderPipeline(desc, nullptr); + } +} + +void createOffscreenFramebuffer() { + const uint32_t w = width_; + const uint32_t h = height_; + lvk::TextureDesc descDepth = { + .type = lvk::TextureType_2D, + .format = lvk::Format_Z_UN24, + .dimensions = {w, h}, + .usage = lvk::TextureUsageBits_Attachment | lvk::TextureUsageBits_Sampled, + .numMipLevels = lvk::calcNumMipLevels(w, h), + .debugName = "Offscreen framebuffer (d)", + }; + if (kNumSamplesMSAA > 1) { + descDepth.usage = lvk::TextureUsageBits_Attachment; + descDepth.numSamples = kNumSamplesMSAA; + descDepth.numMipLevels = 1; + } + + const uint8_t usage = lvk::TextureUsageBits_Attachment | lvk::TextureUsageBits_Sampled | lvk::TextureUsageBits_Storage; + const lvk::Format format = lvk::Format_RGBA_UN8; + + lvk::TextureDesc descColor = { + .type = lvk::TextureType_2D, + .format = format, + .dimensions = {w, h}, + .usage = usage, + .numMipLevels = lvk::calcNumMipLevels(w, h), + .debugName = "Offscreen framebuffer (color)", + }; + if (kNumSamplesMSAA > 1) { + descColor.usage = lvk::TextureUsageBits_Attachment; + descColor.numSamples = kNumSamplesMSAA; + descColor.numMipLevels = 1; + } + + fbOffscreenColor_ = ctx_->createTexture(descColor); + fbOffscreenDepth_ = ctx_->createTexture(descDepth); + lvk::Framebuffer fb = { + .color = {{.texture = fbOffscreenColor_}}, + .depthStencil = {.texture = fbOffscreenDepth_}, + }; + + if (kNumSamplesMSAA > 1) { + fbOffscreenResolve_ = ctx_->createTexture({.type = lvk::TextureType_2D, + .format = format, + .dimensions = {w, h}, + .usage = usage, + .debugName = "Offscreen framebuffer (color resolve)"}); + fb.color[0].resolveTexture = fbOffscreenResolve_; + } + + fbOffscreen_ = fb; +} + +void resize() { + if (!width_ || !height_) { + return; + } + ctx_->recreateSwapchain(width_, height_); + createOffscreenFramebuffer(); +} + +void render(double delta, uint32_t frameIndex) { + LVK_PROFILER_FUNCTION(); + + if (!width_ && !height_) + return; + + lvk::TextureHandle nativeDrawable = ctx_->getCurrentSwapchainTexture(); + fbMain_.color[0].texture = nativeDrawable; + + // imGui + { + imgui_->beginFrame(fbMain_); + + auto imGuiPushFlagsAndStyles = [](bool value) { + ImGui::BeginDisabled(!value); + ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * (value ? 1.0f : 0.3f)); + }; + auto imGuiPopFlagsAndStyles = []() { + ImGui::PopStyleVar(); + ImGui::EndDisabled(); + }; + + const float indentSize = 16.0f; + ImGui::Begin("Keyboard hints:", nullptr, ImGuiWindowFlags_AlwaysAutoResize); + ImGui::Text("W/S/A/D - camera movement"); + ImGui::Text("1/2 - camera up/down"); + ImGui::Text("Shift - fast movement"); + ImGui::Separator(); + ImGui::Checkbox("Ray traced shadows", &enableShadows_); + ImGui::Indent(indentSize); + imGuiPushFlagsAndStyles(enableShadows_); + ImGui::SliderFloat3("Light dir", glm::value_ptr(lightDir_), -1, 1); + imGuiPopFlagsAndStyles(); + lightDir_ = glm::normalize(lightDir_); + ImGui::Unindent(indentSize); + ImGui::Checkbox("Ray traced AO:", &enableAO_); + ImGui::Indent(indentSize); + imGuiPushFlagsAndStyles(enableAO_); + ImGui::Checkbox("Distance based AO", &aoDistanceBased_); + ImGui::SliderFloat("AO radius", &aoRadius_, 0.5f, 16.0f); + ImGui::SliderFloat("AO power", &aoPower_, 1.0f, 2.0f); + ImGui::SliderInt("AO samples", &aoSamples_, 1, 32); + ImGui::Unindent(indentSize); + imGuiPopFlagsAndStyles(); + + ImGui::End(); + + // a nice FPS counter + { + const ImGuiWindowFlags flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav | ImGuiWindowFlags_NoMove; + const ImGuiViewport* v = ImGui::GetMainViewport(); + LVK_ASSERT(v); + ImGui::SetNextWindowPos( + { + v->WorkPos.x + v->WorkSize.x - 15.0f, + v->WorkPos.y + 15.0f, + }, + ImGuiCond_Always, + {1.0f, 0.0f}); + ImGui::SetNextWindowBgAlpha(0.30f); + ImGui::SetNextWindowSize(ImVec2(ImGui::CalcTextSize("FPS : _______").x, 0)); + if (ImGui::Begin("##FPS", nullptr, flags)) { + ImGui::Text("FPS : %i", (int)fps_.getFPS()); + ImGui::Text("Ms : %.1f", 1000.0 / fps_.getFPS()); + } + ImGui::End(); + } + } + + positioner_.update(delta, mousePos_, mousePressed_); + + const float fov = float(45.0f * (M_PI / 180.0f)); + const float aspectRatio = (float)width_ / (float)height_; + + perFrame_ = UniformsPerFrame{ + .proj = glm::perspective(fov, aspectRatio, 0.5f, 500.0f), + .view = camera_.getViewMatrix(), + }; + ctx_->upload(ubPerFrame_[frameIndex], &perFrame_, sizeof(perFrame_)); + + UniformsPerObject perObject; + + perObject.model = glm::scale(mat4(1.0f), vec3(0.05f)); + + ctx_->upload(ubPerObject_[frameIndex], &perObject, sizeof(perObject)); + + lvk::ICommandBuffer& buffer = ctx_->acquireCommandBuffer(); + + buffer.cmdBindVertexBuffer(0, vb0_, 0); + buffer.cmdBindIndexBuffer(ib0_, lvk::IndexFormat_UI32); + + // Pass 1: mesh z-prepass + { + buffer.cmdBeginRendering(renderPassZPrepass_, fbOffscreen_); + buffer.cmdPushDebugGroupLabel("Render Mesh ZPrepass", 0xff0000ff); + buffer.cmdBindRenderPipeline(renderPipelineState_MeshZPrepass_); + struct { + uint64_t perFrame; + uint64_t perObject; + uint64_t materials; + } pc = { + .perFrame = ctx_->gpuAddress(ubPerFrame_[frameIndex]), + .perObject = ctx_->gpuAddress(ubPerObject_[frameIndex]), + .materials = ctx_->gpuAddress(sbMaterials_), + }; + buffer.cmdPushConstants(pc); + buffer.cmdBindDepthState({.compareOp = lvk::CompareOp_Less, .isDepthWriteEnabled = true}); + buffer.cmdDrawIndexed(static_cast(indexData_.size())); + buffer.cmdPopDebugGroupLabel(); + buffer.cmdEndRendering(); + } + // Pass 2: mesh with RTX + { + buffer.cmdBeginRendering(renderPassOffscreen_, fbOffscreen_); + buffer.cmdPushDebugGroupLabel("Render Mesh", 0xff0000ff); + buffer.cmdBindRenderPipeline(renderPipelineState_Mesh_); + struct { + vec4 lightDir; + uint64_t perFrame; + uint64_t perObject; + uint64_t materials; + uint32_t tlas; + int enableShadows; + int enableAO; + int aoDistanceBased; + int aoSamples; + float aoRadius; + float aoPower; + } pc = { + .lightDir = vec4(lightDir_, 1.0), + .perFrame = ctx_->gpuAddress(ubPerFrame_[frameIndex]), + .perObject = ctx_->gpuAddress(ubPerObject_[frameIndex]), + .materials = ctx_->gpuAddress(sbMaterials_), + .tlas = TLAS.index(), + .enableShadows = enableShadows_ ? 1 : 0, + .enableAO = enableAO_ ? 1 : 0, + .aoDistanceBased = aoDistanceBased_ ? 1 : 0, + .aoSamples = aoSamples_, + .aoRadius = aoRadius_, + .aoPower = aoPower_, + }; + buffer.cmdPushConstants(pc); + buffer.cmdBindDepthState({.compareOp = lvk::CompareOp_Equal, .isDepthWriteEnabled = false}); + buffer.cmdDrawIndexed(static_cast(indexData_.size())); + buffer.cmdPopDebugGroupLabel(); + buffer.cmdEndRendering(); + } + + ctx_->submit(buffer); + + // Pass 3: render into the swapchain image + { + lvk::ICommandBuffer& buffer = ctx_->acquireCommandBuffer(); + + lvk::TextureHandle tex = kNumSamplesMSAA > 1 ? fbOffscreen_.color[0].resolveTexture : fbOffscreen_.color[0].texture; + + // This will clear the framebuffer + buffer.cmdBeginRendering(renderPassMain_, fbMain_, {.textures = {tex}}); + { + buffer.cmdBindRenderPipeline(renderPipelineState_Fullscreen_); + buffer.cmdPushDebugGroupLabel("Swapchain Output", 0xff0000ff); + buffer.cmdBindDepthState({}); + struct { + uint32_t texture; + } bindings = { + .texture = tex.index(), + }; + buffer.cmdPushConstants(bindings); + buffer.cmdDraw(3); + buffer.cmdPopDebugGroupLabel(); + + imgui_->endFrame(buffer); + } + buffer.cmdEndRendering(); + + ctx_->submit(buffer, fbMain_.color[0].texture); + } +} + +inline ImVec4 toVec4(const vec4& c) { + return ImVec4(c.x, c.y, c.z, c.w); +} + +int main(int argc, char* argv[]) { + minilog::initialize(nullptr, {.threadNames = false}); + + // find the content folder + { + using namespace std::filesystem; + path subdir("third-party/content/"); + path dir = current_path(); + // find the content somewhere above our current build directory + while (dir != current_path().root_path() && !exists(dir / subdir)) { + dir = dir.parent_path(); + } + if (!exists(dir / subdir)) { + printf("Cannot find the content directory. Run `deploy_content.py` before running this app."); + LVK_ASSERT(false); + return EXIT_FAILURE; + } + folderThirdParty = (dir / path("third-party/deps/src/")).string(); + folderContentRoot = (dir / subdir).string(); + } + + VkPhysicalDeviceAccelerationStructureFeaturesKHR accelerationStructureFeatures = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, + .accelerationStructure = VK_TRUE, + .accelerationStructureCaptureReplay = VK_FALSE, + .accelerationStructureIndirectBuild = VK_FALSE, + .accelerationStructureHostCommands = VK_FALSE, + .descriptorBindingAccelerationStructureUpdateAfterBind = VK_TRUE, + }; + VkPhysicalDeviceRayQueryFeaturesKHR rayQueryFeatures = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, + .pNext = &accelerationStructureFeatures, + .rayQuery = VK_TRUE, + }; + + GLFWwindow* window = lvk::initWindow("Vulkan Bistro", width_, height_); + ctx_ = lvk::createVulkanContextWithSwapchain(window, + width_, + height_, + { + .enableValidation = kEnableValidationLayers, + .extensionsDevice = + { + "VK_KHR_acceleration_structure", + "VK_KHR_deferred_host_operations", + "VK_KHR_pipeline_library", + "VK_KHR_ray_query", + }, + .extensionsDeviceFeatures = &rayQueryFeatures, + }, + lvk::HWDeviceType_Discrete); + if (!ctx_) { + return EXIT_FAILURE; + } + + if (!init()) { + return EXIT_FAILURE; + } + + glfwSetFramebufferSizeCallback(window, [](GLFWwindow*, int width, int height) { + width_ = width; + height_ = height; + resize(); + }); + + glfwSetCursorPosCallback(window, [](auto* window, double x, double y) { + int width, height; + glfwGetFramebufferSize(window, &width, &height); + if (width && height) { + mousePos_ = vec2(x / width, 1.0f - y / height); + ImGui::GetIO().MousePos = ImVec2(x, y); + } + }); + + glfwSetMouseButtonCallback(window, [](auto* window, int button, int action, int mods) { + if (!ImGui::GetIO().WantCaptureMouse) { + if (button == GLFW_MOUSE_BUTTON_LEFT) { + mousePressed_ = (action == GLFW_PRESS); + } + } else { + // release the mouse + mousePressed_ = false; + } + double xpos, ypos; + glfwGetCursorPos(window, &xpos, &ypos); + const ImGuiMouseButton_ imguiButton = (button == GLFW_MOUSE_BUTTON_LEFT) + ? ImGuiMouseButton_Left + : (button == GLFW_MOUSE_BUTTON_RIGHT ? ImGuiMouseButton_Right : ImGuiMouseButton_Middle); + ImGuiIO& io = ImGui::GetIO(); + io.MousePos = ImVec2((float)xpos, (float)ypos); + io.MouseDown[imguiButton] = action == GLFW_PRESS; + }); + + glfwSetScrollCallback(window, [](GLFWwindow* window, double dx, double dy) { + ImGuiIO& io = ImGui::GetIO(); + io.MouseWheelH = (float)dx; + io.MouseWheel = (float)dy; + }); + + glfwSetKeyCallback(window, [](GLFWwindow* window, int key, int, int action, int mods) { + const bool pressed = action != GLFW_RELEASE; + if (key == GLFW_KEY_ESCAPE && pressed) { + glfwSetWindowShouldClose(window, GLFW_TRUE); + } + if (key == GLFW_KEY_ESCAPE && pressed) + glfwSetWindowShouldClose(window, GLFW_TRUE); + if (key == GLFW_KEY_W) { + positioner_.movement_.forward_ = pressed; + } + if (key == GLFW_KEY_S) { + positioner_.movement_.backward_ = pressed; + } + if (key == GLFW_KEY_A) { + positioner_.movement_.left_ = pressed; + } + if (key == GLFW_KEY_D) { + positioner_.movement_.right_ = pressed; + } + if (key == GLFW_KEY_1) { + positioner_.movement_.up_ = pressed; + } + if (key == GLFW_KEY_2) { + positioner_.movement_.down_ = pressed; + } + if (mods & GLFW_MOD_SHIFT) { + positioner_.movement_.fastSpeed_ = pressed; + } + if (key == GLFW_KEY_LEFT_SHIFT || key == GLFW_KEY_RIGHT_SHIFT) { + positioner_.movement_.fastSpeed_ = pressed; + } + if (key == GLFW_KEY_SPACE) { + positioner_.setUpVector(vec3(0.0f, 1.0f, 0.0f)); + } + }); + + double prevTime = glfwGetTime(); + uint32_t frameIndex = 0; + + // Main loop + while (!glfwWindowShouldClose(window)) { + glfwPollEvents(); + + const double newTime = glfwGetTime(); + const double delta = newTime - prevTime; + prevTime = newTime; + + if (!width_ || !height_) + continue; + + fps_.tick(delta); + + render(delta, frameIndex); + + frameIndex = (frameIndex + 1) % kNumBufferedFrames; + } + + // destroy all the Vulkan stuff before closing the window + destroy(); + + glfwDestroyWindow(window); + glfwTerminate(); + + return 0; +} diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 97cf743d4a..62b5a8a6cb 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -128,6 +128,7 @@ ADD_DEMO("004_YUV") if(WIN32 OR UNIX AND NOT (ANDROID OR APPLE)) ADD_DEMO("005_MeshShaders") ADD_DEMO("006_RayTracingHello") + ADD_DEMO("007_RayTracingAO") endif() ADD_DEMO("Tiny")