diff --git a/graphics/deko3d/deko_examples/Makefile b/graphics/deko3d/deko_examples/Makefile new file mode 100644 index 0000000..d237ed2 --- /dev/null +++ b/graphics/deko3d/deko_examples/Makefile @@ -0,0 +1,271 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITPRO)),) +$(error "Please set DEVKITPRO in your environment. export DEVKITPRO=/devkitpro") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITPRO)/libnx/switch_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# ROMFS is the directory containing data to be added to RomFS, relative to the Makefile (Optional) +# +# NO_ICON: if set to anything, do not use icon. +# NO_NACP: if set to anything, no .nacp file is generated. +# APP_TITLE is the name of the app stored in the .nacp file (Optional) +# APP_AUTHOR is the author of the app stored in the .nacp file (Optional) +# APP_VERSION is the version of the app stored in the .nacp file (Optional) +# APP_TITLEID is the titleID of the app stored in the .nacp file (Optional) +# ICON is the filename of the icon (.jpg), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .jpg +# - icon.jpg +# - /default_icon.jpg +# +# CONFIG_JSON is the filename of the NPDM config file (.json), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .json +# - config.json +# If a JSON file is provided or autodetected, an ExeFS PFS0 (.nsp) is built instead +# of a homebrew executable (.nro). This is intended to be used for sysmodules. +# NACP building is skipped as well. +#--------------------------------------------------------------------------------- +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source source/SampleFramework +DATA := data +INCLUDES := include +ROMFS := romfs + +# Output folders for autogenerated files in romfs +OUT_SHADERS := shaders + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE + +CFLAGS := -g -Wall -O2 -ffunction-sections \ + $(ARCH) $(DEFINES) + +CFLAGS += $(INCLUDE) -D__SWITCH__ + +CXXFLAGS := $(CFLAGS) -std=gnu++17 -fno-exceptions -fno-rtti + +ASFLAGS := -g $(ARCH) +LDFLAGS = -specs=$(DEVKITPRO)/libnx/switch.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) + +LIBS := -ldeko3dd -lnx + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(PORTLIBS) $(LIBNX) + + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) +export TOPDIR := $(CURDIR) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +GLSLFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.glsl))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES_BIN := $(addsuffix .o,$(BINFILES)) +export OFILES_SRC := $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) +export OFILES := $(OFILES_BIN) $(OFILES_SRC) +export HFILES_BIN := $(addsuffix .h,$(subst .,_,$(BINFILES))) + +ifneq ($(strip $(ROMFS)),) + ROMFS_TARGETS := + ROMFS_FOLDERS := + ifneq ($(strip $(OUT_SHADERS)),) + ROMFS_SHADERS := $(ROMFS)/$(OUT_SHADERS) + ROMFS_TARGETS += $(patsubst %.glsl, $(ROMFS_SHADERS)/%.dksh, $(GLSLFILES)) + ROMFS_FOLDERS += $(ROMFS_SHADERS) + endif + + export ROMFS_DEPS := $(foreach file,$(ROMFS_TARGETS),$(CURDIR)/$(file)) +endif + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +ifeq ($(strip $(CONFIG_JSON)),) + jsons := $(wildcard *.json) + ifneq (,$(findstring $(TARGET).json,$(jsons))) + export APP_JSON := $(TOPDIR)/$(TARGET).json + else + ifneq (,$(findstring config.json,$(jsons))) + export APP_JSON := $(TOPDIR)/config.json + endif + endif +else + export APP_JSON := $(TOPDIR)/$(CONFIG_JSON) +endif + +ifeq ($(strip $(ICON)),) + icons := $(wildcard *.jpg) + ifneq (,$(findstring $(TARGET).jpg,$(icons))) + export APP_ICON := $(TOPDIR)/$(TARGET).jpg + else + ifneq (,$(findstring icon.jpg,$(icons))) + export APP_ICON := $(TOPDIR)/icon.jpg + endif + endif +else + export APP_ICON := $(TOPDIR)/$(ICON) +endif + +ifeq ($(strip $(NO_ICON)),) + export NROFLAGS += --icon=$(APP_ICON) +endif + +ifeq ($(strip $(NO_NACP)),) + export NROFLAGS += --nacp=$(CURDIR)/$(TARGET).nacp +endif + +ifneq ($(APP_TITLEID),) + export NACPFLAGS += --titleid=$(APP_TITLEID) +endif + +ifneq ($(ROMFS),) + export NROFLAGS += --romfsdir=$(CURDIR)/$(ROMFS) +endif + +.PHONY: all clean + +#--------------------------------------------------------------------------------- +all: $(ROMFS_TARGETS) | $(BUILD) + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +$(BUILD): + @mkdir -p $@ + +ifneq ($(strip $(ROMFS_TARGETS)),) + +$(ROMFS_TARGETS): | $(ROMFS_FOLDERS) + +$(ROMFS_FOLDERS): + @mkdir -p $@ + +$(ROMFS_SHADERS)/%_vsh.dksh: %_vsh.glsl + @echo {vert} $(notdir $<) + @uam -s vert -o $@ $< + +$(ROMFS_SHADERS)/%_tcsh.dksh: %_tcsh.glsl + @echo {tess_ctrl} $(notdir $<) + @uam -s tess_ctrl -o $@ $< + +$(ROMFS_SHADERS)/%_tesh.dksh: %_tesh.glsl + @echo {tess_eval} $(notdir $<) + @uam -s tess_eval -o $@ $< + +$(ROMFS_SHADERS)/%_gsh.dksh: %_gsh.glsl + @echo {geom} $(notdir $<) + @uam -s geom -o $@ $< + +$(ROMFS_SHADERS)/%_fsh.dksh: %_fsh.glsl + @echo {frag} $(notdir $<) + @uam -s frag -o $@ $< + +$(ROMFS_SHADERS)/%.dksh: %.glsl + @echo {comp} $(notdir $<) + @uam -s comp -o $@ $< + +endif + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... +ifeq ($(strip $(APP_JSON)),) + @rm -fr $(BUILD) $(ROMFS_FOLDERS) $(TARGET).nro $(TARGET).nacp $(TARGET).elf +else + @rm -fr $(BUILD) $(ROMFS_FOLDERS) $(TARGET).nsp $(TARGET).nso $(TARGET).npdm $(TARGET).elf +endif + + +#--------------------------------------------------------------------------------- +else +.PHONY: all + +DEPENDS := $(OFILES:.o=.d) + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +ifeq ($(strip $(APP_JSON)),) + +all : $(OUTPUT).nro + +ifeq ($(strip $(NO_NACP)),) +$(OUTPUT).nro : $(OUTPUT).elf $(OUTPUT).nacp $(ROMFS_DEPS) +else +$(OUTPUT).nro : $(OUTPUT).elf $(ROMFS_DEPS) +endif + +else + +all : $(OUTPUT).nsp + +$(OUTPUT).nsp : $(OUTPUT).nso $(OUTPUT).npdm + +$(OUTPUT).nso : $(OUTPUT).elf + +endif + +$(OUTPUT).elf : $(OFILES) + +$(OFILES_SRC) : $(HFILES_BIN) + +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o %_bin.h : %.bin +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +-include $(DEPENDS) + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/graphics/deko3d/deko_examples/romfs/cat-256x256.bc1 b/graphics/deko3d/deko_examples/romfs/cat-256x256.bc1 new file mode 100644 index 0000000..b89708a Binary files /dev/null and b/graphics/deko3d/deko_examples/romfs/cat-256x256.bc1 differ diff --git a/graphics/deko3d/deko_examples/romfs/teapot-idx.bin b/graphics/deko3d/deko_examples/romfs/teapot-idx.bin new file mode 100644 index 0000000..8b34620 Binary files /dev/null and b/graphics/deko3d/deko_examples/romfs/teapot-idx.bin differ diff --git a/graphics/deko3d/deko_examples/romfs/teapot-vtx.bin b/graphics/deko3d/deko_examples/romfs/teapot-vtx.bin new file mode 100644 index 0000000..c8cc429 Binary files /dev/null and b/graphics/deko3d/deko_examples/romfs/teapot-vtx.bin differ diff --git a/graphics/deko3d/deko_examples/source/Example01_SimpleSetup.cpp b/graphics/deko3d/deko_examples/source/Example01_SimpleSetup.cpp new file mode 100644 index 0000000..904dcb3 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/Example01_SimpleSetup.cpp @@ -0,0 +1,176 @@ +/* +** deko3d Example 01: Simple Setup +** This example shows how to setup deko3d for rendering scenes with the GPU. +** New concepts in this example: +** - Creating devices and queues +** - Basic memory management +** - Setting up framebuffers and swapchains +** - Recording a static command list with rendering commands +** - Acquiring and presenting images with the queue and swapchain +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" +#include "SampleFramework/CMemPool.h" + +// C++ standard library headers +#include +#include + +class CExample01 final : public CApplication +{ + static constexpr unsigned NumFramebuffers = 2; + static constexpr uint32_t FramebufferWidth = 1280; + static constexpr uint32_t FramebufferHeight = 720; + static constexpr unsigned StaticCmdSize = 0x1000; + + dk::UniqueDevice device; + dk::UniqueQueue queue; + + std::optional pool_images; + std::optional pool_data; + + dk::UniqueCmdBuf cmdbuf; + + CMemPool::Handle framebuffers_mem[NumFramebuffers]; + dk::Image framebuffers[NumFramebuffers]; + DkCmdList framebuffer_cmdlists[NumFramebuffers]; + dk::UniqueSwapchain swapchain; + + DkCmdList render_cmdlist; + +public: + CExample01() + { + // Create the deko3d device + device = dk::DeviceMaker{}.create(); + + // Create the main queue + queue = dk::QueueMaker{device}.setFlags(DkQueueFlags_Graphics).create(); + + // Create the memory pools + pool_images.emplace(device, DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image, 16*1024*1024); + pool_data.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, 1*1024*1024); + + // Create the static command buffer and feed it freshly allocated memory + cmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle cmdmem = pool_data->allocate(StaticCmdSize); + cmdbuf.addMemory(cmdmem.getMemBlock(), cmdmem.getOffset(), cmdmem.getSize()); + + // Create the framebuffer resources + createFramebufferResources(); + } + + ~CExample01() + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + } + + void createFramebufferResources() + { + // Create layout for the framebuffers + dk::ImageLayout layout_framebuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_UsagePresent | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_RGBA8_Unorm) + .setDimensions(FramebufferWidth, FramebufferHeight) + .initialize(layout_framebuffer); + + // Create the framebuffers + std::array fb_array; + uint64_t fb_size = layout_framebuffer.getSize(); + uint32_t fb_align = layout_framebuffer.getAlignment(); + for (unsigned i = 0; i < NumFramebuffers; i ++) + { + // Allocate a framebuffer + framebuffers_mem[i] = pool_images->allocate(fb_size, fb_align); + framebuffers[i].initialize(layout_framebuffer, framebuffers_mem[i].getMemBlock(), framebuffers_mem[i].getOffset()); + + // Generate a command list that binds it + dk::ImageView colorTarget{ framebuffers[i] }; + cmdbuf.bindRenderTargets(&colorTarget); + framebuffer_cmdlists[i] = cmdbuf.finishList(); + + // Fill in the array for use later by the swapchain creation code + fb_array[i] = &framebuffers[i]; + } + + // Create the swapchain using the framebuffers + swapchain = dk::SwapchainMaker{device, nwindowGetDefault(), fb_array}.create(); + + // Generate the main rendering cmdlist + recordStaticCommands(); + } + + void destroyFramebufferResources() + { + // Return early if we have nothing to destroy + if (!swapchain) return; + + // Make sure the queue is idle before destroying anything + queue.waitIdle(); + + // Clear the static cmdbuf, destroying the static cmdlists in the process + cmdbuf.clear(); + + // Destroy the swapchain + swapchain.destroy(); + + // Destroy the framebuffers + for (unsigned i = 0; i < NumFramebuffers; i ++) + framebuffers_mem[i].destroy(); + } + + void recordStaticCommands() + { + // Calculate several measurements for the scene + unsigned HalfWidth = FramebufferWidth/2, HalfHeight = FramebufferHeight/2; + unsigned BoxSize = 400; + unsigned BoxX = HalfWidth - BoxSize/2, BoxY = HalfHeight - BoxSize/2; + unsigned TileWidth = BoxSize/5, TileHeight = BoxSize/4; + + // Draw a scene using only scissors and clear colors + cmdbuf.setScissors(0, { { 0, 0, FramebufferWidth, FramebufferHeight } }); + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.25f, 0.0f, 1.0f); + cmdbuf.setScissors(0, { { BoxX, BoxY, BoxSize, BoxSize } }); + cmdbuf.clearColor(0, DkColorMask_RGBA, 229/255.0f, 1.0f, 232/255.0f, 1.0f); + cmdbuf.setScissors(0, { { BoxX + 2*TileWidth, BoxY + 1*TileHeight, 1*TileWidth, 1*TileHeight } }); + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.5f, 0.0f, 1.0f); + cmdbuf.setScissors(0, { { BoxX + 1*TileWidth, BoxY + 2*TileHeight, 3*TileWidth, 1*TileHeight } }); + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.5f, 0.0f, 1.0f); + render_cmdlist = cmdbuf.finishList(); + } + + void render() + { + // Acquire a framebuffer from the swapchain (and wait for it to be available) + int slot = queue.acquireImage(swapchain); + + // Run the command list that attaches said framebuffer to the queue + queue.submitCommands(framebuffer_cmdlists[slot]); + + // Run the main rendering command list + queue.submitCommands(render_cmdlist); + + // Now that we are done rendering, present it to the screen + queue.presentImage(swapchain, slot); + } + + bool onFrame(u64 ns) override + { + hidScanInput(); + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + return false; + + render(); + return true; + } +}; + +void Example01(void) +{ + CExample01 app; + app.run(); +} diff --git a/graphics/deko3d/deko_examples/source/Example02_Triangle.cpp b/graphics/deko3d/deko_examples/source/Example02_Triangle.cpp new file mode 100644 index 0000000..97eaa35 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/Example02_Triangle.cpp @@ -0,0 +1,231 @@ +/* +** deko3d Example 02: Triangle +** This example shows how to draw a basic multi-colored triangle. +** New concepts in this example: +** - Loading and using shaders +** - Setting up basic 3D engine state +** - Setting up vertex attributes and vertex buffers +** - Drawing primitives +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" +#include "SampleFramework/CMemPool.h" +#include "SampleFramework/CShader.h" + +// C++ standard library headers +#include +#include + +namespace +{ + struct Vertex + { + float position[3]; + float color[3]; + }; + + constexpr std::array VertexAttribState = + { + DkVtxAttribState{ 0, 0, offsetof(Vertex, position), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + DkVtxAttribState{ 0, 0, offsetof(Vertex, color), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + }; + + constexpr std::array VertexBufferState = + { + DkVtxBufferState{ sizeof(Vertex), 0 }, + }; + + constexpr std::array TriangleVertexData = + { + Vertex{ { 0.0f, +1.0f, 0.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, 0.0f }, { 0.0f, 0.0f, 1.0f } }, + }; +} + +class CExample02 final : public CApplication +{ + static constexpr unsigned NumFramebuffers = 2; + static constexpr uint32_t FramebufferWidth = 1280; + static constexpr uint32_t FramebufferHeight = 720; + static constexpr unsigned StaticCmdSize = 0x10000; + + dk::UniqueDevice device; + dk::UniqueQueue queue; + + std::optional pool_images; + std::optional pool_code; + std::optional pool_data; + + dk::UniqueCmdBuf cmdbuf; + + CShader vertexShader; + CShader fragmentShader; + + CMemPool::Handle vertexBuffer; + + CMemPool::Handle framebuffers_mem[NumFramebuffers]; + dk::Image framebuffers[NumFramebuffers]; + DkCmdList framebuffer_cmdlists[NumFramebuffers]; + dk::UniqueSwapchain swapchain; + + DkCmdList render_cmdlist; + +public: + CExample02() + { + // Create the deko3d device + device = dk::DeviceMaker{}.create(); + + // Create the main queue + queue = dk::QueueMaker{device}.setFlags(DkQueueFlags_Graphics).create(); + + // Create the memory pools + pool_images.emplace(device, DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image, 16*1024*1024); + pool_code.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached | DkMemBlockFlags_Code, 128*1024); + pool_data.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, 1*1024*1024); + + // Create the static command buffer and feed it freshly allocated memory + cmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle cmdmem = pool_data->allocate(StaticCmdSize); + cmdbuf.addMemory(cmdmem.getMemBlock(), cmdmem.getOffset(), cmdmem.getSize()); + + // Load the shaders + vertexShader.load(*pool_code, "romfs:/shaders/basic_vsh.dksh"); + fragmentShader.load(*pool_code, "romfs:/shaders/color_fsh.dksh"); + + // Load the vertex buffer + vertexBuffer = pool_data->allocate(sizeof(TriangleVertexData), alignof(Vertex)); + memcpy(vertexBuffer.getCpuAddr(), TriangleVertexData.data(), vertexBuffer.getSize()); + + // Create the framebuffer resources + createFramebufferResources(); + } + + ~CExample02() + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Destroy the vertex buffer (not strictly needed in this case) + vertexBuffer.destroy(); + } + + void createFramebufferResources() + { + // Create layout for the framebuffers + dk::ImageLayout layout_framebuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_UsagePresent | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_RGBA8_Unorm) + .setDimensions(FramebufferWidth, FramebufferHeight) + .initialize(layout_framebuffer); + + // Create the framebuffers + std::array fb_array; + uint64_t fb_size = layout_framebuffer.getSize(); + uint32_t fb_align = layout_framebuffer.getAlignment(); + for (unsigned i = 0; i < NumFramebuffers; i ++) + { + // Allocate a framebuffer + framebuffers_mem[i] = pool_images->allocate(fb_size, fb_align); + framebuffers[i].initialize(layout_framebuffer, framebuffers_mem[i].getMemBlock(), framebuffers_mem[i].getOffset()); + + // Generate a command list that binds it + dk::ImageView colorTarget{ framebuffers[i] }; + cmdbuf.bindRenderTargets(&colorTarget); + framebuffer_cmdlists[i] = cmdbuf.finishList(); + + // Fill in the array for use later by the swapchain creation code + fb_array[i] = &framebuffers[i]; + } + + // Create the swapchain using the framebuffers + swapchain = dk::SwapchainMaker{device, nwindowGetDefault(), fb_array}.create(); + + // Generate the main rendering cmdlist + recordStaticCommands(); + } + + void destroyFramebufferResources() + { + // Return early if we have nothing to destroy + if (!swapchain) return; + + // Make sure the queue is idle before destroying anything + queue.waitIdle(); + + // Clear the static cmdbuf, destroying the static cmdlists in the process + cmdbuf.clear(); + + // Destroy the swapchain + swapchain.destroy(); + + // Destroy the framebuffers + for (unsigned i = 0; i < NumFramebuffers; i ++) + framebuffers_mem[i].destroy(); + } + + void recordStaticCommands() + { + // Initialize state structs with deko3d defaults + dk::RasterizerState rasterizerState; + dk::ColorState colorState; + dk::ColorWriteState colorWriteState; + + // Configure viewport and scissor + cmdbuf.setViewports(0, { { 0.0f, 0.0f, FramebufferWidth, FramebufferHeight, 0.0f, 1.0f } }); + cmdbuf.setScissors(0, { { 0, 0, FramebufferWidth, FramebufferHeight } }); + + // Clear the color buffer + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + + // Bind state required for drawing the triangle + cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { vertexShader, fragmentShader }); + cmdbuf.bindRasterizerState(rasterizerState); + cmdbuf.bindColorState(colorState); + cmdbuf.bindColorWriteState(colorWriteState); + cmdbuf.bindVtxBuffer(0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize()); + cmdbuf.bindVtxAttribState(VertexAttribState); + cmdbuf.bindVtxBufferState(VertexBufferState); + + // Draw the triangle + cmdbuf.draw(DkPrimitive_Triangles, TriangleVertexData.size(), 1, 0, 0); + + // Finish off this command list + render_cmdlist = cmdbuf.finishList(); + } + + void render() + { + // Acquire a framebuffer from the swapchain (and wait for it to be available) + int slot = queue.acquireImage(swapchain); + + // Run the command list that attaches said framebuffer to the queue + queue.submitCommands(framebuffer_cmdlists[slot]); + + // Run the main rendering command list + queue.submitCommands(render_cmdlist); + + // Now that we are done rendering, present it to the screen + queue.presentImage(swapchain, slot); + } + + bool onFrame(u64 ns) override + { + hidScanInput(); + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + return false; + + render(); + return true; + } +}; + +void Example02(void) +{ + CExample02 app; + app.run(); +} diff --git a/graphics/deko3d/deko_examples/source/Example03_Cube.cpp b/graphics/deko3d/deko_examples/source/Example03_Cube.cpp new file mode 100644 index 0000000..18effe0 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/Example03_Cube.cpp @@ -0,0 +1,372 @@ +/* +** deko3d Example 03: Cube +** This example shows how to draw a basic rotating cube. +** New concepts in this example: +** - Setting up and using a depth buffer +** - Setting up uniform buffers +** - Basic 3D maths, including projection matrices +** - Updating uniforms with a dynamic command buffer +** - Adjusting resolution dynamically by recreating resources (720p handheld/1080p docked) +** - Depth buffer discard after a barrier +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" +#include "SampleFramework/CMemPool.h" +#include "SampleFramework/CShader.h" +#include "SampleFramework/CCmdMemRing.h" + +// C++ standard library headers +#include +#include + +// GLM headers +#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES // Enforces GLSL std140/std430 alignment rules for glm types +#define GLM_FORCE_INTRINSICS // Enables usage of SIMD CPU instructions (requiring the above as well) +#include +#include +#include +#include + +namespace +{ + struct Vertex + { + float position[3]; + float color[3]; + }; + + constexpr std::array VertexAttribState = + { + DkVtxAttribState{ 0, 0, offsetof(Vertex, position), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + DkVtxAttribState{ 0, 0, offsetof(Vertex, color), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + }; + + constexpr std::array VertexBufferState = + { + DkVtxBufferState{ sizeof(Vertex), 0 }, + }; + + constexpr std::array CubeVertexData = + { + // +X face + Vertex{ { +1.0f, +1.0f, +1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, +1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, -1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, -1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // -X face + Vertex{ { -1.0f, +1.0f, -1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, -1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, +1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { -1.0f, +1.0f, +1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // +Y face + Vertex{ { -1.0f, +1.0f, -1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, +1.0f, +1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, +1.0f, +1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, -1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // -Y face + Vertex{ { -1.0f, -1.0f, +1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, -1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, -1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { +1.0f, -1.0f, +1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // +Z face + Vertex{ { -1.0f, +1.0f, +1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, +1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, +1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, +1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // -Z face + Vertex{ { +1.0f, +1.0f, -1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, -1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, -1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { -1.0f, +1.0f, -1.0f }, { 1.0f, 1.0f, 0.0f } }, + }; + + struct Transformation + { + glm::mat4 mdlvMtx; + glm::mat4 projMtx; + }; + + inline float fractf(float x) + { + return x - floorf(x); + } +} + +class CExample03 final : public CApplication +{ + static constexpr unsigned NumFramebuffers = 2; + static constexpr unsigned StaticCmdSize = 0x10000; + static constexpr unsigned DynamicCmdSize = 0x10000; + + dk::UniqueDevice device; + dk::UniqueQueue queue; + + std::optional pool_images; + std::optional pool_code; + std::optional pool_data; + + dk::UniqueCmdBuf cmdbuf; + dk::UniqueCmdBuf dyncmd; + CCmdMemRing dynmem; + + CShader vertexShader; + CShader fragmentShader; + + Transformation transformState; + CMemPool::Handle transformUniformBuffer; + + CMemPool::Handle vertexBuffer; + + uint32_t framebufferWidth; + uint32_t framebufferHeight; + + CMemPool::Handle depthBuffer_mem; + CMemPool::Handle framebuffers_mem[NumFramebuffers]; + + dk::Image depthBuffer; + dk::Image framebuffers[NumFramebuffers]; + DkCmdList framebuffer_cmdlists[NumFramebuffers]; + dk::UniqueSwapchain swapchain; + + DkCmdList render_cmdlist; + +public: + CExample03() + { + // Create the deko3d device + device = dk::DeviceMaker{}.create(); + + // Create the main queue + queue = dk::QueueMaker{device}.setFlags(DkQueueFlags_Graphics).create(); + + // Create the memory pools + pool_images.emplace(device, DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image, 16*1024*1024); + pool_code.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached | DkMemBlockFlags_Code, 128*1024); + pool_data.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, 1*1024*1024); + + // Create the static command buffer and feed it freshly allocated memory + cmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle cmdmem = pool_data->allocate(StaticCmdSize); + cmdbuf.addMemory(cmdmem.getMemBlock(), cmdmem.getOffset(), cmdmem.getSize()); + + // Create the dynamic command buffer and allocate memory for it + dyncmd = dk::CmdBufMaker{device}.create(); + dynmem.allocate(*pool_data, DynamicCmdSize); + + // Load the shaders + vertexShader.load(*pool_code, "romfs:/shaders/transform_vsh.dksh"); + fragmentShader.load(*pool_code, "romfs:/shaders/color_fsh.dksh"); + + // Create the transformation uniform buffer + transformUniformBuffer = pool_data->allocate(sizeof(transformState), DK_UNIFORM_BUF_ALIGNMENT); + + // Load the vertex buffer + vertexBuffer = pool_data->allocate(sizeof(CubeVertexData), alignof(Vertex)); + memcpy(vertexBuffer.getCpuAddr(), CubeVertexData.data(), vertexBuffer.getSize()); + } + + ~CExample03() + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Destroy the vertex buffer (not strictly needed in this case) + vertexBuffer.destroy(); + + // Destroy the uniform buffer (not strictly needed in this case) + transformUniformBuffer.destroy(); + } + + void createFramebufferResources() + { + // Create layout for the depth buffer + dk::ImageLayout layout_depthbuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_Z24S8) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_depthbuffer); + + // Create the depth buffer + depthBuffer_mem = pool_images->allocate(layout_depthbuffer.getSize(), layout_depthbuffer.getAlignment()); + depthBuffer.initialize(layout_depthbuffer, depthBuffer_mem.getMemBlock(), depthBuffer_mem.getOffset()); + + // Create layout for the framebuffers + dk::ImageLayout layout_framebuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_UsagePresent | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_RGBA8_Unorm) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_framebuffer); + + // Create the framebuffers + std::array fb_array; + uint64_t fb_size = layout_framebuffer.getSize(); + uint32_t fb_align = layout_framebuffer.getAlignment(); + for (unsigned i = 0; i < NumFramebuffers; i ++) + { + // Allocate a framebuffer + framebuffers_mem[i] = pool_images->allocate(fb_size, fb_align); + framebuffers[i].initialize(layout_framebuffer, framebuffers_mem[i].getMemBlock(), framebuffers_mem[i].getOffset()); + + // Generate a command list that binds it + dk::ImageView colorTarget{ framebuffers[i] }, depthTarget{ depthBuffer }; + cmdbuf.bindRenderTargets(&colorTarget, &depthTarget); + framebuffer_cmdlists[i] = cmdbuf.finishList(); + + // Fill in the array for use later by the swapchain creation code + fb_array[i] = &framebuffers[i]; + } + + // Create the swapchain using the framebuffers + swapchain = dk::SwapchainMaker{device, nwindowGetDefault(), fb_array}.create(); + + // Generate the main rendering cmdlist + recordStaticCommands(); + + // Initialize the projection matrix + transformState.projMtx = glm::perspectiveRH_ZO( + glm::radians(40.0f), + float(framebufferWidth)/float(framebufferHeight), + 0.01f, 1000.0f); + } + + void destroyFramebufferResources() + { + // Return early if we have nothing to destroy + if (!swapchain) return; + + // Make sure the queue is idle before destroying anything + queue.waitIdle(); + + // Clear the static cmdbuf, destroying the static cmdlists in the process + cmdbuf.clear(); + + // Destroy the swapchain + swapchain.destroy(); + + // Destroy the framebuffers + for (unsigned i = 0; i < NumFramebuffers; i ++) + framebuffers_mem[i].destroy(); + + // Destroy the depth buffer + depthBuffer_mem.destroy(); + } + + void recordStaticCommands() + { + // Initialize state structs with deko3d defaults + dk::RasterizerState rasterizerState; + dk::ColorState colorState; + dk::ColorWriteState colorWriteState; + dk::DepthStencilState depthStencilState; + + // Configure viewport and scissor + cmdbuf.setViewports(0, { { 0.0f, 0.0f, (float)framebufferWidth, (float)framebufferHeight, 0.0f, 1.0f } }); + cmdbuf.setScissors(0, { { 0, 0, framebufferWidth, framebufferHeight } }); + + // Clear the color and depth buffers + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + cmdbuf.clearDepthStencil(true, 1.0f, 0xFF, 0); + + // Bind state required for drawing the cube + cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { vertexShader, fragmentShader }); + cmdbuf.bindUniformBuffer(DkStage_Vertex, 0, transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize()); + cmdbuf.bindRasterizerState(rasterizerState); + cmdbuf.bindColorState(colorState); + cmdbuf.bindColorWriteState(colorWriteState); + cmdbuf.bindDepthStencilState(depthStencilState); + cmdbuf.bindVtxBuffer(0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize()); + cmdbuf.bindVtxAttribState(VertexAttribState); + cmdbuf.bindVtxBufferState(VertexBufferState); + + // Draw the cube + cmdbuf.draw(DkPrimitive_Quads, CubeVertexData.size(), 1, 0, 0); + + // Fragment barrier, to make sure we finish previous work before discarding the depth buffer + cmdbuf.barrier(DkBarrier_Fragments, 0); + + // Discard the depth buffer since we don't need it anymore + cmdbuf.discardDepthStencil(); + + // Finish off this command list + render_cmdlist = cmdbuf.finishList(); + } + + void render() + { + // Begin generating the dynamic command list, for commands that need to be sent only this frame specifically + dynmem.begin(dyncmd); + + // Update the uniform buffer with the new transformation state (this data gets inlined in the command list) + dyncmd.pushConstants( + transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize(), + 0, sizeof(transformState), &transformState); + + // Finish off the dynamic command list, and submit it to the queue + queue.submitCommands(dynmem.end(dyncmd)); + + // Acquire a framebuffer from the swapchain (and wait for it to be available) + int slot = queue.acquireImage(swapchain); + + // Run the command list that attaches said framebuffer to the queue + queue.submitCommands(framebuffer_cmdlists[slot]); + + // Run the main rendering command list + queue.submitCommands(render_cmdlist); + + // Now that we are done rendering, present it to the screen + queue.presentImage(swapchain, slot); + } + + void onOperationMode(AppletOperationMode mode) override + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Choose framebuffer size + chooseFramebufferSize(framebufferWidth, framebufferHeight, mode); + + // Recreate the framebuffers and its associated resources + createFramebufferResources(); + } + + bool onFrame(u64 ns) override + { + hidScanInput(); + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + return false; + + float time = ns / 1000000000.0; // double precision division; followed by implicit cast to single precision + float tau = glm::two_pi(); + + float period1 = fractf(time/8.0f); + float period2 = fractf(time/4.0f); + + // Generate the model-view matrix for this frame + // Keep in mind that GLM transformation functions multiply to the right, so essentially we have: + // mdlvMtx = Translate * RotateX * RotateY * Scale + // This means that the Scale operation is applied first, then RotateY, and so on. + transformState.mdlvMtx = glm::mat4{1.0f}; + transformState.mdlvMtx = glm::translate(transformState.mdlvMtx, glm::vec3{0.0f, 0.0f, -3.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, sinf(period2 * tau) * tau / 8.0f, glm::vec3{1.0f, 0.0f, 0.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, -period1 * tau, glm::vec3{0.0f, 1.0f, 0.0f}); + transformState.mdlvMtx = glm::scale(transformState.mdlvMtx, glm::vec3{0.5f}); + + render(); + return true; + } +}; + +void Example03(void) +{ + CExample03 app; + app.run(); +} diff --git a/graphics/deko3d/deko_examples/source/Example04_TexturedCube.cpp b/graphics/deko3d/deko_examples/source/Example04_TexturedCube.cpp new file mode 100644 index 0000000..c1a5962 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/Example04_TexturedCube.cpp @@ -0,0 +1,414 @@ +/* +** deko3d Example 04: Textured Cube +** This example shows how to render a textured cube. +** New concepts in this example: +** - Loading a texture image from the filesystem +** - Creating and using image descriptors +** - Creating and using samplers and sampler descriptors +** - Calculating combined image+sampler handles for use by shaders +** - Initializing persistent state in a queue +** +** The texture used in this example was borrowed from https://pixabay.com/photos/cat-animal-pet-cats-close-up-300572/ +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" +#include "SampleFramework/CMemPool.h" +#include "SampleFramework/CShader.h" +#include "SampleFramework/CCmdMemRing.h" +#include "SampleFramework/CDescriptorSet.h" +#include "SampleFramework/CExternalImage.h" + +// C++ standard library headers +#include +#include + +// GLM headers +#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES // Enforces GLSL std140/std430 alignment rules for glm types +#define GLM_FORCE_INTRINSICS // Enables usage of SIMD CPU instructions (requiring the above as well) +#include +#include +#include +#include + +namespace +{ + struct Vertex + { + float position[3]; + float texcoord[2]; + }; + + constexpr std::array VertexAttribState = + { + DkVtxAttribState{ 0, 0, offsetof(Vertex, position), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + DkVtxAttribState{ 0, 0, offsetof(Vertex, texcoord), DkVtxAttribSize_2x32, DkVtxAttribType_Float, 0 }, + }; + + constexpr std::array VertexBufferState = + { + DkVtxBufferState{ sizeof(Vertex), 0 }, + }; + + constexpr std::array CubeVertexData = + { + // +X face + Vertex{ { +1.0f, +1.0f, +1.0f }, { 0.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, +1.0f }, { 0.0f, 1.0f } }, + Vertex{ { +1.0f, -1.0f, -1.0f }, { 1.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, -1.0f }, { 1.0f, 0.0f } }, + + // -X face + Vertex{ { -1.0f, +1.0f, -1.0f }, { 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, -1.0f }, { 0.0f, 1.0f } }, + Vertex{ { -1.0f, -1.0f, +1.0f }, { 1.0f, 1.0f } }, + Vertex{ { -1.0f, +1.0f, +1.0f }, { 1.0f, 0.0f } }, + + // +Y face + Vertex{ { -1.0f, +1.0f, -1.0f }, { 0.0f, 0.0f } }, + Vertex{ { -1.0f, +1.0f, +1.0f }, { 0.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, +1.0f }, { 1.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, -1.0f }, { 1.0f, 0.0f } }, + + // -Y face + Vertex{ { -1.0f, -1.0f, +1.0f }, { 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, -1.0f }, { 0.0f, 1.0f } }, + Vertex{ { +1.0f, -1.0f, -1.0f }, { 1.0f, 1.0f } }, + Vertex{ { +1.0f, -1.0f, +1.0f }, { 1.0f, 0.0f } }, + + // +Z face + Vertex{ { -1.0f, +1.0f, +1.0f }, { 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, +1.0f }, { 0.0f, 1.0f } }, + Vertex{ { +1.0f, -1.0f, +1.0f }, { 1.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, +1.0f }, { 1.0f, 0.0f } }, + + // -Z face + Vertex{ { +1.0f, +1.0f, -1.0f }, { 0.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, -1.0f }, { 0.0f, 1.0f } }, + Vertex{ { -1.0f, -1.0f, -1.0f }, { 1.0f, 1.0f } }, + Vertex{ { -1.0f, +1.0f, -1.0f }, { 1.0f, 0.0f } }, + }; + + struct Transformation + { + glm::mat4 mdlvMtx; + glm::mat4 projMtx; + }; + + inline float fractf(float x) + { + return x - floorf(x); + } +} + +class CExample04 final : public CApplication +{ + static constexpr unsigned NumFramebuffers = 2; + static constexpr unsigned StaticCmdSize = 0x10000; + static constexpr unsigned DynamicCmdSize = 0x10000; + static constexpr unsigned MaxImages = 1; + static constexpr unsigned MaxSamplers = 1; + + dk::UniqueDevice device; + dk::UniqueQueue queue; + + std::optional pool_images; + std::optional pool_code; + std::optional pool_data; + + dk::UniqueCmdBuf cmdbuf; + dk::UniqueCmdBuf dyncmd; + CCmdMemRing dynmem; + + CDescriptorSet imageDescriptorSet; + CDescriptorSet samplerDescriptorSet; + + CShader vertexShader; + CShader fragmentShader; + + Transformation transformState; + CMemPool::Handle transformUniformBuffer; + + CMemPool::Handle vertexBuffer; + CExternalImage texImage; + + uint32_t framebufferWidth; + uint32_t framebufferHeight; + + CMemPool::Handle depthBuffer_mem; + CMemPool::Handle framebuffers_mem[NumFramebuffers]; + + dk::Image depthBuffer; + dk::Image framebuffers[NumFramebuffers]; + DkCmdList framebuffer_cmdlists[NumFramebuffers]; + dk::UniqueSwapchain swapchain; + + DkCmdList render_cmdlist; + +public: + CExample04() + { + // Create the deko3d device + device = dk::DeviceMaker{}.create(); + + // Create the main queue + queue = dk::QueueMaker{device}.setFlags(DkQueueFlags_Graphics).create(); + + // Create the memory pools + pool_images.emplace(device, DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image, 16*1024*1024); + pool_code.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached | DkMemBlockFlags_Code, 128*1024); + pool_data.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, 1*1024*1024); + + // Create the static command buffer and feed it freshly allocated memory + cmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle cmdmem = pool_data->allocate(StaticCmdSize); + cmdbuf.addMemory(cmdmem.getMemBlock(), cmdmem.getOffset(), cmdmem.getSize()); + + // Create the dynamic command buffer and allocate memory for it + dyncmd = dk::CmdBufMaker{device}.create(); + dynmem.allocate(*pool_data, DynamicCmdSize); + + // Create the image and sampler descriptor sets + imageDescriptorSet.allocate(*pool_data); + samplerDescriptorSet.allocate(*pool_data); + + // Load the shaders + vertexShader.load(*pool_code, "romfs:/shaders/transform_vsh.dksh"); + fragmentShader.load(*pool_code, "romfs:/shaders/texture_fsh.dksh"); + + // Create the transformation uniform buffer + transformUniformBuffer = pool_data->allocate(sizeof(transformState), DK_UNIFORM_BUF_ALIGNMENT); + + // Load the vertex buffer + vertexBuffer = pool_data->allocate(sizeof(CubeVertexData), alignof(Vertex)); + memcpy(vertexBuffer.getCpuAddr(), CubeVertexData.data(), vertexBuffer.getSize()); + + // Load the image + texImage.load(*pool_images, *pool_data, device, queue, "romfs:/cat-256x256.bc1", 256, 256, DkImageFormat_RGB_BC1); + + // Configure persistent state in the queue + { + // Upload the image descriptor + imageDescriptorSet.update(cmdbuf, 0, texImage.getDescriptor()); + + // Configure a sampler + dk::Sampler sampler; + sampler.setFilter(DkFilter_Linear, DkFilter_Linear); + sampler.setWrapMode(DkWrapMode_ClampToEdge, DkWrapMode_ClampToEdge, DkWrapMode_ClampToEdge); + + // Upload the sampler descriptor + dk::SamplerDescriptor samplerDescriptor; + samplerDescriptor.initialize(sampler); + samplerDescriptorSet.update(cmdbuf, 0, samplerDescriptor); + + // Bind the image and sampler descriptor sets + imageDescriptorSet.bindForImages(cmdbuf); + samplerDescriptorSet.bindForSamplers(cmdbuf); + + // Submit the configuration commands to the queue + queue.submitCommands(cmdbuf.finishList()); + queue.waitIdle(); + cmdbuf.clear(); + } + } + + ~CExample04() + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Destroy the vertex buffer (not strictly needed in this case) + vertexBuffer.destroy(); + + // Destroy the uniform buffer (not strictly needed in this case) + transformUniformBuffer.destroy(); + } + + void createFramebufferResources() + { + // Create layout for the depth buffer + dk::ImageLayout layout_depthbuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_Z24S8) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_depthbuffer); + + // Create the depth buffer + depthBuffer_mem = pool_images->allocate(layout_depthbuffer.getSize(), layout_depthbuffer.getAlignment()); + depthBuffer.initialize(layout_depthbuffer, depthBuffer_mem.getMemBlock(), depthBuffer_mem.getOffset()); + + // Create layout for the framebuffers + dk::ImageLayout layout_framebuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_UsagePresent | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_RGBA8_Unorm) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_framebuffer); + + // Create the framebuffers + std::array fb_array; + uint64_t fb_size = layout_framebuffer.getSize(); + uint32_t fb_align = layout_framebuffer.getAlignment(); + for (unsigned i = 0; i < NumFramebuffers; i ++) + { + // Allocate a framebuffer + framebuffers_mem[i] = pool_images->allocate(fb_size, fb_align); + framebuffers[i].initialize(layout_framebuffer, framebuffers_mem[i].getMemBlock(), framebuffers_mem[i].getOffset()); + + // Generate a command list that binds it + dk::ImageView colorTarget{ framebuffers[i] }, depthTarget { depthBuffer }; + cmdbuf.bindRenderTargets(&colorTarget, &depthTarget); + framebuffer_cmdlists[i] = cmdbuf.finishList(); + + // Fill in the array for use later by the swapchain creation code + fb_array[i] = &framebuffers[i]; + } + + // Create the swapchain using the framebuffers + swapchain = dk::SwapchainMaker{device, nwindowGetDefault(), fb_array}.create(); + + // Generate the main rendering cmdlist + recordStaticCommands(); + + // Initialize the projection matrix + transformState.projMtx = glm::perspectiveRH_ZO( + glm::radians(40.0f), + float(framebufferWidth)/float(framebufferHeight), + 0.01f, 1000.0f); + } + + void destroyFramebufferResources() + { + // Return early if we have nothing to destroy + if (!swapchain) return; + + // Make sure the queue is idle before destroying anything + queue.waitIdle(); + + // Clear the static cmdbuf, destroying the static cmdlists in the process + cmdbuf.clear(); + + // Destroy the swapchain + swapchain.destroy(); + + // Destroy the framebuffers + for (unsigned i = 0; i < NumFramebuffers; i ++) + framebuffers_mem[i].destroy(); + + // Destroy the depth buffer + depthBuffer_mem.destroy(); + } + + void recordStaticCommands() + { + // Initialize state structs with deko3d defaults + dk::RasterizerState rasterizerState; + dk::ColorState colorState; + dk::ColorWriteState colorWriteState; + dk::DepthStencilState depthStencilState; + + // Configure viewport and scissor + cmdbuf.setViewports(0, { { 0.0f, 0.0f, (float)framebufferWidth, (float)framebufferHeight, 0.0f, 1.0f } }); + cmdbuf.setScissors(0, { { 0, 0, framebufferWidth, framebufferHeight } }); + + // Clear the color and depth buffers + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + cmdbuf.clearDepthStencil(true, 1.0f, 0xFF, 0); + + // Bind state required for drawing the cube + cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { vertexShader, fragmentShader }); + cmdbuf.bindUniformBuffer(DkStage_Vertex, 0, transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize()); + cmdbuf.bindTextures(DkStage_Fragment, 0, dkMakeTextureHandle(0, 0)); + cmdbuf.bindRasterizerState(rasterizerState); + cmdbuf.bindColorState(colorState); + cmdbuf.bindColorWriteState(colorWriteState); + cmdbuf.bindDepthStencilState(depthStencilState); + cmdbuf.bindVtxBuffer(0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize()); + cmdbuf.bindVtxAttribState(VertexAttribState); + cmdbuf.bindVtxBufferState(VertexBufferState); + + // Draw the cube + cmdbuf.draw(DkPrimitive_Quads, CubeVertexData.size(), 1, 0, 0); + + // Fragment barrier, to make sure we finish previous work before discarding the depth buffer + cmdbuf.barrier(DkBarrier_Fragments, 0); + + // Discard the depth buffer since we don't need it anymore + cmdbuf.discardDepthStencil(); + + // Finish off this command list + render_cmdlist = cmdbuf.finishList(); + } + + void render() + { + // Begin generating the dynamic command list, for commands that need to be sent only this frame specifically + dynmem.begin(dyncmd); + + // Update the uniform buffer with the new transformation state (this data gets inlined in the command list) + dyncmd.pushConstants( + transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize(), + 0, sizeof(transformState), &transformState); + + // Finish off the dynamic command list (which also submits it to the queue) + queue.submitCommands(dynmem.end(dyncmd)); + + // Acquire a framebuffer from the swapchain (and wait for it to be available) + int slot = queue.acquireImage(swapchain); + + // Run the command list that attaches said framebuffer to the queue + queue.submitCommands(framebuffer_cmdlists[slot]); + + // Run the main rendering command list + queue.submitCommands(render_cmdlist); + + // Now that we are done rendering, present it to the screen + queue.presentImage(swapchain, slot); + } + + void onOperationMode(AppletOperationMode mode) override + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Choose framebuffer size + chooseFramebufferSize(framebufferWidth, framebufferHeight, mode); + + // Recreate the framebuffers and its associated resources + createFramebufferResources(); + } + + bool onFrame(u64 ns) override + { + hidScanInput(); + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + return false; + + float time = ns / 1000000000.0; // double precision division; followed by implicit cast to single precision + float tau = glm::two_pi(); + + float period1 = fractf(time/8.0f); + float period2 = fractf(time/4.0f); + + // Generate the model-view matrix for this frame + // Keep in mind that GLM transformation functions multiply to the right, so essentially we have: + // mdlvMtx = Translate * RotateX * RotateY * Scale + // This means that the Scale operation is applied first, then RotateY, and so on. + transformState.mdlvMtx = glm::mat4{1.0f}; + transformState.mdlvMtx = glm::translate(transformState.mdlvMtx, glm::vec3{0.0f, 0.0f, -3.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, sinf(period2 * tau) * tau / 8.0f, glm::vec3{1.0f, 0.0f, 0.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, -period1 * tau, glm::vec3{0.0f, 1.0f, 0.0f}); + transformState.mdlvMtx = glm::scale(transformState.mdlvMtx, glm::vec3{0.5f}); + + render(); + return true; + } +}; + +void Example04(void) +{ + CExample04 app; + app.run(); +} diff --git a/graphics/deko3d/deko_examples/source/Example05_Tessellation.cpp b/graphics/deko3d/deko_examples/source/Example05_Tessellation.cpp new file mode 100644 index 0000000..a456436 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/Example05_Tessellation.cpp @@ -0,0 +1,253 @@ +/* +** deko3d Example 05: Simple Tessellation +** This example shows how to use tessellation. +** New concepts in this example: +** - Using tessellation control and evaluation shaders +** - Controlling tessellation parameters +** - Configuring and using line polygon mode +** - Configuring and using built-in edge smoothing +** - Configuring and using blending (needed for obeying alpha generated by edge smoothing) +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" +#include "SampleFramework/CMemPool.h" +#include "SampleFramework/CShader.h" + +// C++ standard library headers +#include +#include + +namespace +{ + struct Vertex + { + float position[3]; + float color[3]; + }; + + constexpr std::array VertexAttribState = + { + DkVtxAttribState{ 0, 0, offsetof(Vertex, position), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + DkVtxAttribState{ 0, 0, offsetof(Vertex, color), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + }; + + constexpr std::array VertexBufferState = + { + DkVtxBufferState{ sizeof(Vertex), 0 }, + }; + + constexpr std::array TriangleVertexData = + { + Vertex{ { 0.0f, +1.0f, 0.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, 0.0f }, { 0.0f, 0.0f, 1.0f } }, + }; +} + +class CExample05 final : public CApplication +{ + static constexpr unsigned NumFramebuffers = 2; + static constexpr uint32_t FramebufferWidth = 1280; + static constexpr uint32_t FramebufferHeight = 720; + static constexpr unsigned StaticCmdSize = 0x10000; + + dk::UniqueDevice device; + dk::UniqueQueue queue; + + std::optional pool_images; + std::optional pool_code; + std::optional pool_data; + + dk::UniqueCmdBuf cmdbuf; + + CShader vertexShader; + CShader tessCtrlShader; + CShader tessEvalShader; + CShader fragmentShader; + + CMemPool::Handle vertexBuffer; + + CMemPool::Handle framebuffers_mem[NumFramebuffers]; + dk::Image framebuffers[NumFramebuffers]; + DkCmdList framebuffer_cmdlists[NumFramebuffers]; + dk::UniqueSwapchain swapchain; + + DkCmdList render_cmdlist; + +public: + CExample05() + { + // Create the deko3d device + device = dk::DeviceMaker{}.create(); + + // Create the main queue + queue = dk::QueueMaker{device}.setFlags(DkQueueFlags_Graphics).create(); + + // Create the memory pools + pool_images.emplace(device, DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image, 16*1024*1024); + pool_code.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached | DkMemBlockFlags_Code, 128*1024); + pool_data.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, 1*1024*1024); + + // Create the static command buffer and feed it freshly allocated memory + cmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle cmdmem = pool_data->allocate(StaticCmdSize); + cmdbuf.addMemory(cmdmem.getMemBlock(), cmdmem.getOffset(), cmdmem.getSize()); + + // Load the shaders + vertexShader.load(*pool_code, "romfs:/shaders/basic_vsh.dksh"); + tessCtrlShader.load(*pool_code, "romfs:/shaders/tess_simple_tcsh.dksh"); + tessEvalShader.load(*pool_code, "romfs:/shaders/tess_simple_tesh.dksh"); + fragmentShader.load(*pool_code, "romfs:/shaders/color_fsh.dksh"); + + // Load the vertex buffer + vertexBuffer = pool_data->allocate(sizeof(TriangleVertexData), alignof(Vertex)); + memcpy(vertexBuffer.getCpuAddr(), TriangleVertexData.data(), vertexBuffer.getSize()); + + // Create the framebuffer resources + createFramebufferResources(); + } + + ~CExample05() + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Destroy the vertex buffer (not strictly needed in this case) + vertexBuffer.destroy(); + } + + void createFramebufferResources() + { + // Create layout for the framebuffers + dk::ImageLayout layout_framebuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_UsagePresent | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_RGBA8_Unorm) + .setDimensions(FramebufferWidth, FramebufferHeight) + .initialize(layout_framebuffer); + + // Create the framebuffers + std::array fb_array; + uint64_t fb_size = layout_framebuffer.getSize(); + uint32_t fb_align = layout_framebuffer.getAlignment(); + for (unsigned i = 0; i < NumFramebuffers; i ++) + { + // Allocate a framebuffer + framebuffers_mem[i] = pool_images->allocate(fb_size, fb_align); + framebuffers[i].initialize(layout_framebuffer, framebuffers_mem[i].getMemBlock(), framebuffers_mem[i].getOffset()); + + // Generate a command list that binds it + dk::ImageView colorTarget{ framebuffers[i] }; + cmdbuf.bindRenderTargets(&colorTarget); + framebuffer_cmdlists[i] = cmdbuf.finishList(); + + // Fill in the array for use later by the swapchain creation code + fb_array[i] = &framebuffers[i]; + } + + // Create the swapchain using the framebuffers + swapchain = dk::SwapchainMaker{device, nwindowGetDefault(), fb_array}.create(); + + // Generate the main rendering cmdlist + recordStaticCommands(); + } + + void destroyFramebufferResources() + { + // Return early if we have nothing to destroy + if (!swapchain) return; + + // Make sure the queue is idle before destroying anything + queue.waitIdle(); + + // Clear the static cmdbuf, destroying the static cmdlists in the process + cmdbuf.clear(); + + // Destroy the swapchain + swapchain.destroy(); + + // Destroy the framebuffers + for (unsigned i = 0; i < NumFramebuffers; i ++) + framebuffers_mem[i].destroy(); + } + + void recordStaticCommands() + { + // Initialize state structs with deko3d defaults + dk::RasterizerState rasterizerState; + dk::ColorState colorState; + dk::ColorWriteState colorWriteState; + dk::BlendState blendState; + + // Configure rasterizer state: draw polygons as lines, and enable polygon smoothing + rasterizerState.setPolygonMode(DkPolygonMode_Line); + rasterizerState.setPolygonSmoothEnable(true); + + // Configure color state: enable blending (needed for polygon smoothing since it generates alpha values) + colorState.setBlendEnable(0, true); + + // Configure viewport and scissor + cmdbuf.setViewports(0, { { 0.0f, 0.0f, FramebufferWidth, FramebufferHeight, 0.0f, 1.0f } }); + cmdbuf.setScissors(0, { { 0, 0, FramebufferWidth, FramebufferHeight } }); + + // Clear the color buffer + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + + // Bind state required for drawing the triangle + cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { vertexShader, tessCtrlShader, tessEvalShader, fragmentShader }); + cmdbuf.bindRasterizerState(rasterizerState); + cmdbuf.bindColorState(colorState); + cmdbuf.bindColorWriteState(colorWriteState); + cmdbuf.bindBlendStates(0, blendState); + cmdbuf.bindVtxBuffer(0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize()); + cmdbuf.bindVtxAttribState(VertexAttribState); + cmdbuf.bindVtxBufferState(VertexBufferState); + cmdbuf.setLineWidth(4.0f); + cmdbuf.setPatchSize(3); + + // Note that the tessellation control shader is optional. If no such shader is bound, + // the following commands can be used to control tessellation: + // (try it out! remove the "tessCtrlShader" from the bindShaders call and uncomment these) + //cmdbuf.setTessInnerLevels(5.0f); + //cmdbuf.setTessOuterLevels(7.0f, 3.0f, 5.0f); + + // Draw the triangle + cmdbuf.draw(DkPrimitive_Patches, TriangleVertexData.size(), 1, 0, 0); + + // Finish off this command list + render_cmdlist = cmdbuf.finishList(); + } + + void render() + { + // Acquire a framebuffer from the swapchain (and wait for it to be available) + int slot = queue.acquireImage(swapchain); + + // Run the command list that attaches said framebuffer to the queue + queue.submitCommands(framebuffer_cmdlists[slot]); + + // Run the main rendering command list + queue.submitCommands(render_cmdlist); + + // Now that we are done rendering, present it to the screen + queue.presentImage(swapchain, slot); + } + + bool onFrame(u64 ns) override + { + hidScanInput(); + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + return false; + + render(); + return true; + } +}; + +void Example05(void) +{ + CExample05 app; + app.run(); +} diff --git a/graphics/deko3d/deko_examples/source/Example06_Multisampling.cpp b/graphics/deko3d/deko_examples/source/Example06_Multisampling.cpp new file mode 100644 index 0000000..639dddd --- /dev/null +++ b/graphics/deko3d/deko_examples/source/Example06_Multisampling.cpp @@ -0,0 +1,408 @@ +/* +** deko3d Example 06: Simple Multisampling +** This example shows how to use a multisampled render target, which is then resolved into the final framebuffer. +** New concepts in this example: +** - Creating multisampled render targets +** - Rendering to non-swapchain render targets +** - Configuring multisample state +** - Performing a resolve step +** - Discarding color/depth buffers that are not used for presentation +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" +#include "SampleFramework/CMemPool.h" +#include "SampleFramework/CShader.h" +#include "SampleFramework/CCmdMemRing.h" + +// C++ standard library headers +#include +#include + +// GLM headers +#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES // Enforces GLSL std140/std430 alignment rules for glm types +#define GLM_FORCE_INTRINSICS // Enables usage of SIMD CPU instructions (requiring the above as well) +#include +#include +#include +#include + +namespace +{ + struct Vertex + { + float position[3]; + float color[3]; + }; + + constexpr std::array VertexAttribState = + { + DkVtxAttribState{ 0, 0, offsetof(Vertex, position), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + DkVtxAttribState{ 0, 0, offsetof(Vertex, color), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + }; + + constexpr std::array VertexBufferState = + { + DkVtxBufferState{ sizeof(Vertex), 0 }, + }; + + constexpr std::array CubeVertexData = + { + // +X face + Vertex{ { +1.0f, +1.0f, +1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, +1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, -1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, -1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // -X face + Vertex{ { -1.0f, +1.0f, -1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, -1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, +1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { -1.0f, +1.0f, +1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // +Y face + Vertex{ { -1.0f, +1.0f, -1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, +1.0f, +1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, +1.0f, +1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, -1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // -Y face + Vertex{ { -1.0f, -1.0f, +1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, -1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, -1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { +1.0f, -1.0f, +1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // +Z face + Vertex{ { -1.0f, +1.0f, +1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, +1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, +1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { +1.0f, +1.0f, +1.0f }, { 1.0f, 1.0f, 0.0f } }, + + // -Z face + Vertex{ { +1.0f, +1.0f, -1.0f }, { 1.0f, 0.0f, 0.0f } }, + Vertex{ { +1.0f, -1.0f, -1.0f }, { 0.0f, 1.0f, 0.0f } }, + Vertex{ { -1.0f, -1.0f, -1.0f }, { 0.0f, 0.0f, 1.0f } }, + Vertex{ { -1.0f, +1.0f, -1.0f }, { 1.0f, 1.0f, 0.0f } }, + }; + + struct Transformation + { + glm::mat4 mdlvMtx; + glm::mat4 projMtx; + }; + + inline float fractf(float x) + { + return x - floorf(x); + } +} + +class CExample06 final : public CApplication +{ + static constexpr unsigned NumFramebuffers = 2; + static constexpr unsigned StaticCmdSize = 0x10000; + static constexpr unsigned DynamicCmdSize = 0x10000; + static constexpr DkMsMode MultisampleMode = DkMsMode_4x; + + dk::UniqueDevice device; + dk::UniqueQueue queue; + + std::optional pool_images; + std::optional pool_code; + std::optional pool_data; + + dk::UniqueCmdBuf cmdbuf; + dk::UniqueCmdBuf dyncmd; + CCmdMemRing dynmem; + + CShader vertexShader; + CShader fragmentShader; + + Transformation transformState; + CMemPool::Handle transformUniformBuffer; + + CMemPool::Handle vertexBuffer; + + uint32_t framebufferWidth; + uint32_t framebufferHeight; + + CMemPool::Handle colorBuffer_mem; + CMemPool::Handle depthBuffer_mem; + CMemPool::Handle framebuffers_mem[NumFramebuffers]; + + dk::Image colorBuffer; + dk::Image depthBuffer; + dk::Image framebuffers[NumFramebuffers]; + DkCmdList framebuffer_cmdlists[NumFramebuffers]; + dk::UniqueSwapchain swapchain; + + DkCmdList render_cmdlist, discard_cmdlist; + +public: + CExample06() + { + // Create the deko3d device + device = dk::DeviceMaker{}.create(); + + // Create the main queue + queue = dk::QueueMaker{device}.setFlags(DkQueueFlags_Graphics).create(); + + // Create the memory pools + pool_images.emplace(device, DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image, 64*1024*1024); + pool_code.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached | DkMemBlockFlags_Code, 128*1024); + pool_data.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, 1*1024*1024); + + // Create the static command buffer and feed it freshly allocated memory + cmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle cmdmem = pool_data->allocate(StaticCmdSize); + cmdbuf.addMemory(cmdmem.getMemBlock(), cmdmem.getOffset(), cmdmem.getSize()); + + // Create the dynamic command buffer and allocate memory for it + dyncmd = dk::CmdBufMaker{device}.create(); + dynmem.allocate(*pool_data, DynamicCmdSize); + + // Load the shaders + vertexShader.load(*pool_code, "romfs:/shaders/transform_vsh.dksh"); + fragmentShader.load(*pool_code, "romfs:/shaders/color_fsh.dksh"); + + // Create the transformation uniform buffer + transformUniformBuffer = pool_data->allocate(sizeof(transformState), DK_UNIFORM_BUF_ALIGNMENT); + + // Load the vertex buffer + vertexBuffer = pool_data->allocate(sizeof(CubeVertexData), alignof(Vertex)); + memcpy(vertexBuffer.getCpuAddr(), CubeVertexData.data(), vertexBuffer.getSize()); + } + + ~CExample06() + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Destroy the vertex buffer (not strictly needed in this case) + vertexBuffer.destroy(); + + // Destroy the uniform buffer (not strictly needed in this case) + transformUniformBuffer.destroy(); + } + + void createFramebufferResources() + { + // Create layout for the (multisampled) color buffer + dk::ImageLayout layout_colorbuffer; + dk::ImageLayoutMaker{device} + .setType(DkImageType_2DMS) + .setFlags(DkImageFlags_UsageRender | DkImageFlags_Usage2DEngine | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_RGBA8_Unorm) + .setMsMode(MultisampleMode) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_colorbuffer); + + // Create layout for the (also multisampled) depth buffer + dk::ImageLayout layout_depthbuffer; + dk::ImageLayoutMaker{device} + .setType(DkImageType_2DMS) + .setFlags(DkImageFlags_UsageRender | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_Z24S8) + .setMsMode(MultisampleMode) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_depthbuffer); + + // Create the color buffer + colorBuffer_mem = pool_images->allocate(layout_colorbuffer.getSize(), layout_colorbuffer.getAlignment()); + colorBuffer.initialize(layout_colorbuffer, colorBuffer_mem.getMemBlock(), colorBuffer_mem.getOffset()); + + // Create the depth buffer + depthBuffer_mem = pool_images->allocate(layout_depthbuffer.getSize(), layout_depthbuffer.getAlignment()); + depthBuffer.initialize(layout_depthbuffer, depthBuffer_mem.getMemBlock(), depthBuffer_mem.getOffset()); + + // Create layout for the framebuffers + dk::ImageLayout layout_framebuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_Usage2DEngine | DkImageFlags_UsagePresent) + .setFormat(DkImageFormat_RGBA8_Unorm) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_framebuffer); + + // Create the framebuffers + std::array fb_array; + uint64_t fb_size = layout_framebuffer.getSize(); + uint32_t fb_align = layout_framebuffer.getAlignment(); + for (unsigned i = 0; i < NumFramebuffers; i ++) + { + // Allocate a framebuffer + framebuffers_mem[i] = pool_images->allocate(fb_size, fb_align); + framebuffers[i].initialize(layout_framebuffer, framebuffers_mem[i].getMemBlock(), framebuffers_mem[i].getOffset()); + + // Generate a command list that resolves the color buffer into the framebuffer + dk::ImageView colorView { colorBuffer }, framebufferView { framebuffers[i] }; + cmdbuf.resolveImage(colorView, framebufferView); + framebuffer_cmdlists[i] = cmdbuf.finishList(); + + // Fill in the array for use later by the swapchain creation code + fb_array[i] = &framebuffers[i]; + } + + // Create the swapchain using the framebuffers + swapchain = dk::SwapchainMaker{device, nwindowGetDefault(), fb_array}.create(); + + // Generate the main command lists + recordStaticCommands(); + + // Initialize the projection matrix + transformState.projMtx = glm::perspectiveRH_ZO( + glm::radians(40.0f), + float(framebufferWidth)/float(framebufferHeight), + 0.01f, 1000.0f); + } + + void destroyFramebufferResources() + { + // Return early if we have nothing to destroy + if (!swapchain) return; + + // Make sure the queue is idle before destroying anything + queue.waitIdle(); + + // Clear the static cmdbuf, destroying the static cmdlists in the process + cmdbuf.clear(); + + // Destroy the swapchain + swapchain.destroy(); + + // Destroy the framebuffers + for (unsigned i = 0; i < NumFramebuffers; i ++) + framebuffers_mem[i].destroy(); + + // Destroy the depth buffer + depthBuffer_mem.destroy(); + + // Destroy the color buffer + colorBuffer_mem.destroy(); + } + + void recordStaticCommands() + { + // Initialize state structs with deko3d defaults + dk::RasterizerState rasterizerState; + dk::MultisampleState multisampleState; + dk::ColorState colorState; + dk::ColorWriteState colorWriteState; + dk::DepthStencilState depthStencilState; + + // Configure multisample state + multisampleState.setMode(MultisampleMode); + multisampleState.setLocations(); + + // Bind color buffer and depth buffer + dk::ImageView colorTarget { colorBuffer }, depthTarget { depthBuffer }; + cmdbuf.bindRenderTargets(&colorTarget, &depthTarget); + + // Configure viewport and scissor + cmdbuf.setViewports(0, { { 0.0f, 0.0f, (float)framebufferWidth, (float)framebufferHeight, 0.0f, 1.0f } }); + cmdbuf.setScissors(0, { { 0, 0, framebufferWidth, framebufferHeight } }); + + // Clear the color and depth buffers + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + cmdbuf.clearDepthStencil(true, 1.0f, 0xFF, 0); + + // Bind state required for drawing the cube + cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { vertexShader, fragmentShader }); + cmdbuf.bindUniformBuffer(DkStage_Vertex, 0, transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize()); + cmdbuf.bindRasterizerState(rasterizerState); + cmdbuf.bindMultisampleState(multisampleState); + cmdbuf.bindColorState(colorState); + cmdbuf.bindColorWriteState(colorWriteState); + cmdbuf.bindDepthStencilState(depthStencilState); + cmdbuf.bindVtxBuffer(0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize()); + cmdbuf.bindVtxAttribState(VertexAttribState); + cmdbuf.bindVtxBufferState(VertexBufferState); + + // Draw the cube + cmdbuf.draw(DkPrimitive_Quads, CubeVertexData.size(), 1, 0, 0); + + // Finish off this command list + render_cmdlist = cmdbuf.finishList(); + + // Discard the color and depth buffers since we don't need them anymore + cmdbuf.bindRenderTargets(&colorTarget, &depthTarget); + cmdbuf.discardColor(0); + cmdbuf.discardDepthStencil(); + + // Finish off this command list + discard_cmdlist = cmdbuf.finishList(); + } + + void render() + { + // Begin generating the dynamic command list, for commands that need to be sent only this frame specifically + dynmem.begin(dyncmd); + + // Update the uniform buffer with the new transformation state (this data gets inlined in the command list) + dyncmd.pushConstants( + transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize(), + 0, sizeof(transformState), &transformState); + + // Finish off the dynamic command list (which also submits it to the queue) + queue.submitCommands(dynmem.end(dyncmd)); + + // Run the main rendering command list + queue.submitCommands(render_cmdlist); + + // Acquire a framebuffer from the swapchain + int slot = queue.acquireImage(swapchain); + + // Submit the command list that resolves the color buffer to the framebuffer + queue.submitCommands(framebuffer_cmdlists[slot]); + + // Submit the command list used for discarding the color and depth buffers + queue.submitCommands(discard_cmdlist); + + // Now that we are done rendering, present it to the screen (this also flushes the queue) + queue.presentImage(swapchain, slot); + } + + void onOperationMode(AppletOperationMode mode) override + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Choose framebuffer size + chooseFramebufferSize(framebufferWidth, framebufferHeight, mode); + + // Recreate the framebuffers and its associated resources + createFramebufferResources(); + } + + bool onFrame(u64 ns) override + { + hidScanInput(); + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + return false; + + float time = ns / 1000000000.0; // double precision division; followed by implicit cast to single precision + float tau = glm::two_pi(); + + float period1 = fractf(time/8.0f); + float period2 = fractf(time/4.0f); + + // Generate the model-view matrix for this frame + // Keep in mind that GLM transformation functions multiply to the right, so essentially we have: + // mdlvMtx = Translate * RotateX * RotateY * Scale + // This means that the Scale operation is applied first, then RotateY, and so on. + transformState.mdlvMtx = glm::mat4{1.0f}; + transformState.mdlvMtx = glm::translate(transformState.mdlvMtx, glm::vec3{0.0f, 0.0f, -3.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, sinf(period2 * tau) * tau / 8.0f, glm::vec3{1.0f, 0.0f, 0.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, -period1 * tau, glm::vec3{0.0f, 1.0f, 0.0f}); + transformState.mdlvMtx = glm::scale(transformState.mdlvMtx, glm::vec3{0.5f}); + + render(); + return true; + } +}; + +void Example06(void) +{ + CExample06 app; + app.run(); +} diff --git a/graphics/deko3d/deko_examples/source/Example07_MeshLighting.cpp b/graphics/deko3d/deko_examples/source/Example07_MeshLighting.cpp new file mode 100644 index 0000000..9c3089d --- /dev/null +++ b/graphics/deko3d/deko_examples/source/Example07_MeshLighting.cpp @@ -0,0 +1,401 @@ +/* +** deko3d Example 07: Mesh Loading and Lighting (sRGB) +** This example shows how to load a mesh, and render it using per-fragment lighting. +** New concepts in this example: +** - Loading geometry data (mesh) from the filesystem +** - Configuring and using index buffers +** - Using sRGB framebuffers +** - Using multiple uniform buffers on different stages +** - Basic Blinn-Phong lighting with Reinhard tone mapping +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" +#include "SampleFramework/CMemPool.h" +#include "SampleFramework/CShader.h" +#include "SampleFramework/CCmdMemRing.h" +#include "SampleFramework/FileLoader.h" + +// C++ standard library headers +#include +#include + +// GLM headers +#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES // Enforces GLSL std140/std430 alignment rules for glm types +#define GLM_FORCE_INTRINSICS // Enables usage of SIMD CPU instructions (requiring the above as well) +#include +#include +#include +#include + +namespace +{ + struct Vertex + { + float position[3]; + float normal[3]; + }; + + constexpr std::array VertexAttribState = + { + DkVtxAttribState{ 0, 0, offsetof(Vertex, position), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + DkVtxAttribState{ 0, 0, offsetof(Vertex, normal), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + }; + + constexpr std::array VertexBufferState = + { + DkVtxBufferState{ sizeof(Vertex), 0 }, + }; + + struct Transformation + { + glm::mat4 mdlvMtx; + glm::mat4 projMtx; + }; + + struct Lighting + { + glm::vec4 lightPos; // if w=0 this is lightDir + glm::vec3 ambient; + glm::vec3 diffuse; + glm::vec4 specular; // w is shininess + }; + + inline float fractf(float x) + { + return x - floorf(x); + } +} + +class CExample07 final : public CApplication +{ + static constexpr unsigned NumFramebuffers = 2; + static constexpr unsigned StaticCmdSize = 0x10000; + static constexpr unsigned DynamicCmdSize = 0x10000; + static constexpr DkMsMode MultisampleMode = DkMsMode_4x; + + dk::UniqueDevice device; + dk::UniqueQueue queue; + + std::optional pool_images; + std::optional pool_code; + std::optional pool_data; + + dk::UniqueCmdBuf cmdbuf; + dk::UniqueCmdBuf dyncmd; + CCmdMemRing dynmem; + + CShader vertexShader; + CShader fragmentShader; + + Transformation transformState; + CMemPool::Handle transformUniformBuffer; + + Lighting lightingState; + CMemPool::Handle lightingUniformBuffer; + + CMemPool::Handle vertexBuffer; + CMemPool::Handle indexBuffer; + + uint32_t framebufferWidth; + uint32_t framebufferHeight; + + CMemPool::Handle colorBuffer_mem; + CMemPool::Handle depthBuffer_mem; + CMemPool::Handle framebuffers_mem[NumFramebuffers]; + + dk::Image colorBuffer; + dk::Image depthBuffer; + dk::Image framebuffers[NumFramebuffers]; + DkCmdList framebuffer_cmdlists[NumFramebuffers]; + dk::UniqueSwapchain swapchain; + + DkCmdList render_cmdlist, discard_cmdlist; + +public: + CExample07() + { + // Create the deko3d device + device = dk::DeviceMaker{}.create(); + + // Create the main queue + queue = dk::QueueMaker{device}.setFlags(DkQueueFlags_Graphics).create(); + + // Create the memory pools + pool_images.emplace(device, DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image, 64*1024*1024); + pool_code.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached | DkMemBlockFlags_Code, 128*1024); + pool_data.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, 1*1024*1024); + + // Create the static command buffer and feed it freshly allocated memory + cmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle cmdmem = pool_data->allocate(StaticCmdSize); + cmdbuf.addMemory(cmdmem.getMemBlock(), cmdmem.getOffset(), cmdmem.getSize()); + + // Create the dynamic command buffer and allocate memory for it + dyncmd = dk::CmdBufMaker{device}.create(); + dynmem.allocate(*pool_data, DynamicCmdSize); + + // Load the shaders + vertexShader.load(*pool_code, "romfs:/shaders/transform_normal_vsh.dksh"); + fragmentShader.load(*pool_code, "romfs:/shaders/basic_lighting_fsh.dksh"); + + // Create the transformation uniform buffer + transformUniformBuffer = pool_data->allocate(sizeof(transformState), DK_UNIFORM_BUF_ALIGNMENT); + + // Create the lighting uniform buffer + lightingUniformBuffer = pool_data->allocate(sizeof(lightingState), DK_UNIFORM_BUF_ALIGNMENT); + + // Initialize the lighting state + lightingState.lightPos = glm::vec4{0.0f, 4.0f, 1.0f, 1.0f}; + lightingState.ambient = glm::vec3{0.046227f,0.028832f,0.003302f}; + lightingState.diffuse = glm::vec3{0.564963f,0.367818f,0.051293f}; + lightingState.specular = glm::vec4{24.0f*glm::vec3{0.394737f,0.308916f,0.134004f}, 64.0f}; + + // Load the teapot mesh + vertexBuffer = LoadFile(*pool_data, "romfs:/teapot-vtx.bin", alignof(Vertex)); + indexBuffer = LoadFile(*pool_data, "romfs:/teapot-idx.bin", alignof(u16)); + } + + ~CExample07() + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Destroy the index buffer (not strictly needed in this case) + indexBuffer.destroy(); + + // Destroy the vertex buffer (not strictly needed in this case) + vertexBuffer.destroy(); + + // Destroy the uniform buffer (not strictly needed in this case) + transformUniformBuffer.destroy(); + } + + void createFramebufferResources() + { + // Create layout for the (multisampled) color buffer + dk::ImageLayout layout_colorbuffer; + dk::ImageLayoutMaker{device} + .setType(DkImageType_2DMS) + .setFlags(DkImageFlags_UsageRender | DkImageFlags_Usage2DEngine | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_RGBA8_Unorm_sRGB) + .setMsMode(MultisampleMode) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_colorbuffer); + + // Create layout for the (also multisampled) depth buffer + dk::ImageLayout layout_depthbuffer; + dk::ImageLayoutMaker{device} + .setType(DkImageType_2DMS) + .setFlags(DkImageFlags_UsageRender | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_Z24S8) + .setMsMode(MultisampleMode) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_depthbuffer); + + // Create the color buffer + colorBuffer_mem = pool_images->allocate(layout_colorbuffer.getSize(), layout_colorbuffer.getAlignment()); + colorBuffer.initialize(layout_colorbuffer, colorBuffer_mem.getMemBlock(), colorBuffer_mem.getOffset()); + + // Create the depth buffer + depthBuffer_mem = pool_images->allocate(layout_depthbuffer.getSize(), layout_depthbuffer.getAlignment()); + depthBuffer.initialize(layout_depthbuffer, depthBuffer_mem.getMemBlock(), depthBuffer_mem.getOffset()); + + // Create layout for the framebuffers + dk::ImageLayout layout_framebuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_Usage2DEngine | DkImageFlags_UsagePresent) + .setFormat(DkImageFormat_RGBA8_Unorm_sRGB) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_framebuffer); + + // Create the framebuffers + std::array fb_array; + uint64_t fb_size = layout_framebuffer.getSize(); + uint32_t fb_align = layout_framebuffer.getAlignment(); + for (unsigned i = 0; i < NumFramebuffers; i ++) + { + // Allocate a framebuffer + framebuffers_mem[i] = pool_images->allocate(fb_size, fb_align); + framebuffers[i].initialize(layout_framebuffer, framebuffers_mem[i].getMemBlock(), framebuffers_mem[i].getOffset()); + + // Generate a command list that resolves the color buffer into the framebuffer + dk::ImageView colorView { colorBuffer }, framebufferView { framebuffers[i] }; + cmdbuf.resolveImage(colorView, framebufferView); + framebuffer_cmdlists[i] = cmdbuf.finishList(); + + // Fill in the array for use later by the swapchain creation code + fb_array[i] = &framebuffers[i]; + } + + // Create the swapchain using the framebuffers + swapchain = dk::SwapchainMaker{device, nwindowGetDefault(), fb_array}.create(); + + // Generate the main command lists + recordStaticCommands(); + + // Initialize the projection matrix + transformState.projMtx = glm::perspectiveRH_ZO( + glm::radians(40.0f), + float(framebufferWidth)/float(framebufferHeight), + 0.01f, 1000.0f); + } + + void destroyFramebufferResources() + { + // Return early if we have nothing to destroy + if (!swapchain) return; + + // Make sure the queue is idle before destroying anything + queue.waitIdle(); + + // Clear the static cmdbuf, destroying the static cmdlists in the process + cmdbuf.clear(); + + // Destroy the swapchain + swapchain.destroy(); + + // Destroy the framebuffers + for (unsigned i = 0; i < NumFramebuffers; i ++) + framebuffers_mem[i].destroy(); + + // Destroy the depth buffer + depthBuffer_mem.destroy(); + + // Destroy the color buffer + colorBuffer_mem.destroy(); + } + + void recordStaticCommands() + { + // Initialize state structs with deko3d defaults + dk::RasterizerState rasterizerState; + dk::MultisampleState multisampleState; + dk::ColorState colorState; + dk::ColorWriteState colorWriteState; + dk::DepthStencilState depthStencilState; + + // Configure multisample state + multisampleState.setMode(MultisampleMode); + multisampleState.setLocations(); + + // Bind color buffer and depth buffer + dk::ImageView colorTarget { colorBuffer }, depthTarget { depthBuffer }; + cmdbuf.bindRenderTargets(&colorTarget, &depthTarget); + + // Configure viewport and scissor + cmdbuf.setViewports(0, { { 0.0f, 0.0f, (float)framebufferWidth, (float)framebufferHeight, 0.0f, 1.0f } }); + cmdbuf.setScissors(0, { { 0, 0, framebufferWidth, framebufferHeight } }); + + // Clear the color and depth buffers + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + cmdbuf.clearDepthStencil(true, 1.0f, 0xFF, 0); + + // Bind state required for drawing the mesh + cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { vertexShader, fragmentShader }); + cmdbuf.bindUniformBuffer(DkStage_Vertex, 0, transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize()); + cmdbuf.bindUniformBuffer(DkStage_Fragment, 0, lightingUniformBuffer.getGpuAddr(), lightingUniformBuffer.getSize()); + cmdbuf.bindRasterizerState(rasterizerState); + cmdbuf.bindMultisampleState(multisampleState); + cmdbuf.bindColorState(colorState); + cmdbuf.bindColorWriteState(colorWriteState); + cmdbuf.bindDepthStencilState(depthStencilState); + cmdbuf.bindVtxBuffer(0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize()); + cmdbuf.bindVtxAttribState(VertexAttribState); + cmdbuf.bindVtxBufferState(VertexBufferState); + cmdbuf.bindIdxBuffer(DkIdxFormat_Uint16, indexBuffer.getGpuAddr()); + + // Draw the mesh + cmdbuf.drawIndexed(DkPrimitive_Triangles, indexBuffer.getSize() / sizeof(u16), 1, 0, 0, 0); + + // Finish off this command list + render_cmdlist = cmdbuf.finishList(); + + // Discard the color and depth buffers since we don't need them anymore + cmdbuf.bindRenderTargets(&colorTarget, &depthTarget); + cmdbuf.discardColor(0); + cmdbuf.discardDepthStencil(); + + // Finish off this command list + discard_cmdlist = cmdbuf.finishList(); + } + + void render() + { + // Begin generating the dynamic command list, for commands that need to be sent only this frame specifically + dynmem.begin(dyncmd); + + // Update the transformation uniform buffer with the new state (this data gets inlined in the command list) + dyncmd.pushConstants( + transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize(), + 0, sizeof(transformState), &transformState); + + // Update the lighting uniform buffer with the new state + dyncmd.pushConstants( + lightingUniformBuffer.getGpuAddr(), lightingUniformBuffer.getSize(), + 0, sizeof(lightingState), &lightingState); + + // Finish off the dynamic command list (which also submits it to the queue) + queue.submitCommands(dynmem.end(dyncmd)); + + // Run the main rendering command list + queue.submitCommands(render_cmdlist); + + // Acquire a framebuffer from the swapchain + int slot = queue.acquireImage(swapchain); + + // Submit the command list that resolves the color buffer to the framebuffer + queue.submitCommands(framebuffer_cmdlists[slot]); + + // Submit the command list used for discarding the color and depth buffers + queue.submitCommands(discard_cmdlist); + + // Now that we are done rendering, present it to the screen (this also flushes the queue) + queue.presentImage(swapchain, slot); + } + + void onOperationMode(AppletOperationMode mode) override + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Choose framebuffer size + chooseFramebufferSize(framebufferWidth, framebufferHeight, mode); + + // Recreate the framebuffers and its associated resources + createFramebufferResources(); + } + + bool onFrame(u64 ns) override + { + hidScanInput(); + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + return false; + + float time = ns / 1000000000.0; // double precision division; followed by implicit cast to single precision + float tau = glm::two_pi(); + + float period1 = fractf(time/8.0f); + float period2 = fractf(time/4.0f); + + // Generate the model-view matrix for this frame + // Keep in mind that GLM transformation functions multiply to the right, so essentially we have: + // mdlvMtx = Translate1 * RotateX * RotateY * Translate2 + // This means that the Translate2 operation is applied first, then RotateY, and so on. + transformState.mdlvMtx = glm::mat4{1.0f}; + transformState.mdlvMtx = glm::translate(transformState.mdlvMtx, glm::vec3{0.0f, 0.0f, -3.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, sinf(period2 * tau) * tau / 8.0f, glm::vec3{1.0f, 0.0f, 0.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, -period1 * tau, glm::vec3{0.0f, 1.0f, 0.0f}); + transformState.mdlvMtx = glm::translate(transformState.mdlvMtx, glm::vec3{0.0f, -0.5f, 0.0f}); + + render(); + return true; + } +}; + +void Example07(void) +{ + CExample07 app; + app.run(); +} diff --git a/graphics/deko3d/deko_examples/source/Example08_DeferredShading.cpp b/graphics/deko3d/deko_examples/source/Example08_DeferredShading.cpp new file mode 100644 index 0000000..7a24662 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/Example08_DeferredShading.cpp @@ -0,0 +1,482 @@ +/* +** deko3d Example 08: Deferred Shading (Multipass Rendering with Tiled Cache) +** This example shows how to perform deferred shading, a multipass rendering technique that goes well with the tiled cache. +** New concepts in this example: +** - Rendering to multiple render targets (MRT) at once +** - Floating point render targets +** - Enabling and configuring the tiled cache +** - Using the tiled barrier for relaxing ordering to the tiles generated by the binner (as opposed to a full fragment barrier) +** - Custom composition step reading the output of previous rendering passes as textures +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" +#include "SampleFramework/CMemPool.h" +#include "SampleFramework/CShader.h" +#include "SampleFramework/CCmdMemRing.h" +#include "SampleFramework/CDescriptorSet.h" +#include "SampleFramework/FileLoader.h" + +// C++ standard library headers +#include +#include + +// GLM headers +#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES // Enforces GLSL std140/std430 alignment rules for glm types +#define GLM_FORCE_INTRINSICS // Enables usage of SIMD CPU instructions (requiring the above as well) +#include +#include +#include +#include + +namespace +{ + struct Vertex + { + float position[3]; + float normal[3]; + }; + + constexpr std::array VertexAttribState = + { + DkVtxAttribState{ 0, 0, offsetof(Vertex, position), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + DkVtxAttribState{ 0, 0, offsetof(Vertex, normal), DkVtxAttribSize_3x32, DkVtxAttribType_Float, 0 }, + }; + + constexpr std::array VertexBufferState = + { + DkVtxBufferState{ sizeof(Vertex), 0 }, + }; + + struct Transformation + { + glm::mat4 mdlvMtx; + glm::mat4 projMtx; + }; + + struct Lighting + { + glm::vec4 lightPos; // if w=0 this is lightDir + glm::vec3 ambient; + glm::vec3 diffuse; + glm::vec4 specular; // w is shininess + }; + + inline float fractf(float x) + { + return x - floorf(x); + } +} + +class CExample08 final : public CApplication +{ + static constexpr unsigned NumFramebuffers = 2; + static constexpr unsigned StaticCmdSize = 0x10000; + static constexpr unsigned DynamicCmdSize = 0x10000; + static constexpr unsigned MaxImages = 3; + static constexpr unsigned MaxSamplers = 1; + + dk::UniqueDevice device; + dk::UniqueQueue queue; + + std::optional pool_images; + std::optional pool_code; + std::optional pool_data; + + dk::UniqueCmdBuf cmdbuf; + dk::UniqueCmdBuf dyncmd; + CCmdMemRing dynmem; + + CDescriptorSet imageDescriptorSet; + CDescriptorSet samplerDescriptorSet; + + CShader vertexShader; + CShader fragmentShader; + + CShader compositionVertexShader; + CShader compositionFragmentShader; + + Transformation transformState; + CMemPool::Handle transformUniformBuffer; + + Lighting lightingState; + CMemPool::Handle lightingUniformBuffer; + + CMemPool::Handle vertexBuffer; + CMemPool::Handle indexBuffer; + + uint32_t framebufferWidth; + uint32_t framebufferHeight; + + CMemPool::Handle albedoBuffer_mem; + CMemPool::Handle normalBuffer_mem; + CMemPool::Handle viewDirBuffer_mem; + CMemPool::Handle depthBuffer_mem; + CMemPool::Handle framebuffers_mem[NumFramebuffers]; + + dk::Image albedoBuffer; + dk::Image normalBuffer; + dk::Image viewDirBuffer; + dk::Image depthBuffer; + dk::Image framebuffers[NumFramebuffers]; + DkCmdList framebuffer_cmdlists[NumFramebuffers]; + dk::UniqueSwapchain swapchain; + + DkCmdList render_cmdlist, composition_cmdlist; + +public: + CExample08() + { + // Create the deko3d device + device = dk::DeviceMaker{}.create(); + + // Create the main queue + queue = dk::QueueMaker{device}.setFlags(DkQueueFlags_Graphics).create(); + + // Create the memory pools + pool_images.emplace(device, DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image, 64*1024*1024); + pool_code.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached | DkMemBlockFlags_Code, 1*1024*1024); + pool_data.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, 1*1024*1024); + + // Create the static command buffer and feed it freshly allocated memory + cmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle cmdmem = pool_data->allocate(StaticCmdSize); + cmdbuf.addMemory(cmdmem.getMemBlock(), cmdmem.getOffset(), cmdmem.getSize()); + + // Create the dynamic command buffer and allocate memory for it + dyncmd = dk::CmdBufMaker{device}.create(); + dynmem.allocate(*pool_data, DynamicCmdSize); + + // Create the image and sampler descriptor sets + imageDescriptorSet.allocate(*pool_data); + samplerDescriptorSet.allocate(*pool_data); + + // Load the shaders + vertexShader.load(*pool_code, "romfs:/shaders/transform_normal_vsh.dksh"); + fragmentShader.load(*pool_code, "romfs:/shaders/basic_deferred_fsh.dksh"); + compositionVertexShader.load(*pool_code, "romfs:/shaders/composition_vsh.dksh"); + compositionFragmentShader.load(*pool_code, "romfs:/shaders/composition_fsh.dksh"); + + // Create the transformation uniform buffer + transformUniformBuffer = pool_data->allocate(sizeof(transformState), DK_UNIFORM_BUF_ALIGNMENT); + + // Create the lighting uniform buffer + lightingUniformBuffer = pool_data->allocate(sizeof(lightingState), DK_UNIFORM_BUF_ALIGNMENT); + + // Initialize the lighting state + lightingState.lightPos = glm::vec4{0.0f, 4.0f, 1.0f, 1.0f}; + lightingState.ambient = glm::vec3{0.046227f,0.028832f,0.003302f}; + lightingState.diffuse = glm::vec3{0.564963f,0.367818f,0.051293f}; + lightingState.specular = glm::vec4{24.0f*glm::vec3{0.394737f,0.308916f,0.134004f}, 64.0f}; + + // Load the teapot mesh + vertexBuffer = LoadFile(*pool_data, "romfs:/teapot-vtx.bin", alignof(Vertex)); + indexBuffer = LoadFile(*pool_data, "romfs:/teapot-idx.bin", alignof(u16)); + + // Configure persistent state in the queue + { + // Bind the image and sampler descriptor sets + imageDescriptorSet.bindForImages(cmdbuf); + samplerDescriptorSet.bindForSamplers(cmdbuf); + + // Enable the tiled cache + cmdbuf.setTileSize(64, 64); // example size, please experiment with this + cmdbuf.tiledCacheOp(DkTiledCacheOp_Enable); + + // Submit the configuration commands to the queue + queue.submitCommands(cmdbuf.finishList()); + queue.waitIdle(); + cmdbuf.clear(); + } + } + + void createFramebufferResources() + { + // Calculate layout for the different buffers part of the g-buffer + dk::ImageLayout layout_gbuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_RGBA16_Float) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_gbuffer); + + // Calculate layout for the depth buffer + dk::ImageLayout layout_depthbuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_Z24S8) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_depthbuffer); + + // Calculate layout for the framebuffers + dk::ImageLayout layout_framebuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_UsagePresent) + .setFormat(DkImageFormat_RGBA8_Unorm_sRGB) + .setDimensions(framebufferWidth, framebufferHeight) + .initialize(layout_framebuffer); + + // Create the albedo buffer + albedoBuffer_mem = pool_images->allocate(layout_gbuffer.getSize(), layout_gbuffer.getAlignment()); + albedoBuffer.initialize(layout_gbuffer, albedoBuffer_mem.getMemBlock(), albedoBuffer_mem.getOffset()); + + // Create the normal buffer + normalBuffer_mem = pool_images->allocate(layout_gbuffer.getSize(), layout_gbuffer.getAlignment()); + normalBuffer.initialize(layout_gbuffer, normalBuffer_mem.getMemBlock(), normalBuffer_mem.getOffset()); + + // Create the view direction buffer + viewDirBuffer_mem = pool_images->allocate(layout_gbuffer.getSize(), layout_gbuffer.getAlignment()); + viewDirBuffer.initialize(layout_gbuffer, viewDirBuffer_mem.getMemBlock(), viewDirBuffer_mem.getOffset()); + + // Create the depth buffer + depthBuffer_mem = pool_images->allocate(layout_depthbuffer.getSize(), layout_depthbuffer.getAlignment()); + depthBuffer.initialize(layout_depthbuffer, depthBuffer_mem.getMemBlock(), depthBuffer_mem.getOffset()); + + // Create the framebuffers + uint64_t fb_size = layout_framebuffer.getSize(); + uint32_t fb_align = layout_framebuffer.getAlignment(); + DkImage const* fb_array[NumFramebuffers]; + for (unsigned i = 0; i < NumFramebuffers; i ++) + { + // Allocate a framebuffer + framebuffers_mem[i] = pool_images->allocate(fb_size, fb_align); + framebuffers[i].initialize(layout_framebuffer, framebuffers_mem[i].getMemBlock(), framebuffers_mem[i].getOffset()); + + // Generate a command list that binds the framebuffer + dk::ImageView framebufferView { framebuffers[i] }; + cmdbuf.bindRenderTargets(&framebufferView); + framebuffer_cmdlists[i] = cmdbuf.finishList(); + + // Fill in the array for use later by the swapchain creation code + fb_array[i] = &framebuffers[i]; + } + + // Create the swapchain using the framebuffers + swapchain = dk::SwapchainMaker{device, nwindowGetDefault(), fb_array, NumFramebuffers}.create(); + + // Generate the static command lists + recordStaticCommands(); + + // Initialize the projection matrix + transformState.projMtx = glm::perspectiveRH_ZO( + glm::radians(40.0f), + float(framebufferWidth)/float(framebufferHeight), + 0.01f, 1000.0f); + } + + void destroyFramebufferResources() + { + // Return early if we have nothing to destroy + if (!swapchain) return; + + // Make sure the queue is idle before destroying anything + queue.waitIdle(); + + // Clear the static cmdbuf, destroying the static cmdlists in the process + cmdbuf.clear(); + + // Destroy the swapchain + swapchain.destroy(); + + // Destroy the framebuffers + for (unsigned i = 0; i < NumFramebuffers; i ++) + framebuffers_mem[i].destroy(); + + // Destroy the rendertargets + depthBuffer_mem.destroy(); + viewDirBuffer_mem.destroy(); + normalBuffer_mem.destroy(); + albedoBuffer_mem.destroy(); + } + + ~CExample08() + { + // Destory the framebuffer resources + destroyFramebufferResources(); + + // Destroy the index buffer (not strictly needed in this case) + indexBuffer.destroy(); + + // Destroy the vertex buffer (not strictly needed in this case) + vertexBuffer.destroy(); + + // Destroy the uniform buffers (not strictly needed in this case) + lightingUniformBuffer.destroy(); + transformUniformBuffer.destroy(); + } + + void recordStaticCommands() + { + // Initialize state structs with deko3d defaults + dk::RasterizerState rasterizerState; + dk::ColorState colorState; + dk::ColorWriteState colorWriteState; + dk::DepthStencilState depthStencilState; + + // Bind g-buffer and depth buffer + dk::ImageView albedoTarget { albedoBuffer }, normalTarget { normalBuffer }, viewDirTarget { viewDirBuffer }, depthTarget { depthBuffer }; + cmdbuf.bindRenderTargets({ &albedoTarget, &normalTarget, &viewDirTarget }, &depthTarget); + + // Configure viewport and scissor + const DkViewport viewport = { 0.0f, 0.0f, float(framebufferWidth), float(framebufferHeight), 0.0f, 1.0f }; + const DkScissor scissor = { 0, 0, framebufferWidth, framebufferHeight }; + cmdbuf.setViewports(0, { viewport, viewport, viewport }); + cmdbuf.setScissors(0, { scissor, scissor, scissor }); + + // Clear the g-buffer and the depth buffer + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + cmdbuf.clearColor(1, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + cmdbuf.clearColor(2, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + cmdbuf.clearDepthStencil(true, 1.0f, 0xFF, 0); + + // Bind state required for drawing the mesh + cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { vertexShader, fragmentShader }); + cmdbuf.bindUniformBuffer(DkStage_Vertex, 0, transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize()); + cmdbuf.bindRasterizerState(rasterizerState); + cmdbuf.bindColorState(colorState); + cmdbuf.bindColorWriteState(colorWriteState); + cmdbuf.bindDepthStencilState(depthStencilState); + cmdbuf.bindVtxBuffer(0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize()); + cmdbuf.bindVtxAttribState(VertexAttribState); + cmdbuf.bindVtxBufferState(VertexBufferState); + cmdbuf.bindIdxBuffer(DkIdxFormat_Uint16, indexBuffer.getGpuAddr()); + + // Draw the mesh + cmdbuf.drawIndexed(DkPrimitive_Triangles, indexBuffer.getSize() / sizeof(u16), 1, 0, 0, 0); + + // Tiled barrier (similar to using Vulkan's vkCmdNextSubpass) + image cache + // flush so that the next rendering step can access the output from this step + cmdbuf.barrier(DkBarrier_Tiles, DkInvalidateFlags_Image); + + // Discard the depth buffer since we don't need it anymore + cmdbuf.discardDepthStencil(); + + // End of the main rendering command list + render_cmdlist = cmdbuf.finishList(); + + // Upload image descriptors + std::array descriptors; + descriptors[0].initialize(albedoTarget); + descriptors[1].initialize(normalTarget); + descriptors[2].initialize(viewDirTarget); + imageDescriptorSet.update(cmdbuf, 0, descriptors); + + // Upload sampler descriptor + dk::Sampler sampler; + dk::SamplerDescriptor samplerDescriptor; + samplerDescriptor.initialize(sampler); + samplerDescriptorSet.update(cmdbuf, 0, samplerDescriptor); + + // Flush the descriptor cache + cmdbuf.barrier(DkBarrier_None, DkInvalidateFlags_Descriptors); + + // Bind state required for doing the composition + cmdbuf.setViewports(0, viewport); + cmdbuf.setScissors(0, scissor); + cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { compositionVertexShader, compositionFragmentShader }); + cmdbuf.bindUniformBuffer(DkStage_Fragment, 0, lightingUniformBuffer.getGpuAddr(), lightingUniformBuffer.getSize()); + cmdbuf.bindTextures(DkStage_Fragment, 0, { + dkMakeTextureHandle(0, 0), + dkMakeTextureHandle(1, 0), + dkMakeTextureHandle(2, 0), + }); + cmdbuf.bindRasterizerState(dk::RasterizerState{}); + cmdbuf.bindColorState(dk::ColorState{}); + cmdbuf.bindColorWriteState(dk::ColorWriteState{}); + cmdbuf.bindVtxAttribState({}); + + // Draw the full screen quad + cmdbuf.draw(DkPrimitive_Quads, 4, 1, 0, 0); + + // Tiled barrier + cmdbuf.barrier(DkBarrier_Tiles, 0); + + // Discard the g-buffer since we don't need it anymore + cmdbuf.bindRenderTargets({ &albedoTarget, &normalTarget, &viewDirTarget }); + cmdbuf.discardColor(0); + cmdbuf.discardColor(1); + cmdbuf.discardColor(2); + + // End of the composition cmdlist + composition_cmdlist = cmdbuf.finishList(); + } + + void render() + { + // Begin generating the dynamic command list, for commands that need to be sent only this frame specifically + dynmem.begin(dyncmd); + + // Update the transformation uniform buffer with the new state (this data gets inlined in the command list) + dyncmd.pushConstants( + transformUniformBuffer.getGpuAddr(), transformUniformBuffer.getSize(), + 0, sizeof(transformState), &transformState); + + // Update the lighting uniform buffer with the new state + dyncmd.pushConstants( + lightingUniformBuffer.getGpuAddr(), lightingUniformBuffer.getSize(), + 0, sizeof(lightingState), &lightingState); + + // Finish off the dynamic command list (which also submits it to the queue) + queue.submitCommands(dynmem.end(dyncmd)); + + // Run the main rendering command list + queue.submitCommands(render_cmdlist); + + // Acquire a framebuffer from the swapchain + int slot = queue.acquireImage(swapchain); + + // Submit the command list that binds the correct framebuffer + queue.submitCommands(framebuffer_cmdlists[slot]); + + // Submit the command list used for performing the composition + queue.submitCommands(composition_cmdlist); + + // Now that we are done rendering, present it to the screen (this also flushes the queue) + queue.presentImage(swapchain, slot); + } + + void onOperationMode(AppletOperationMode mode) override + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Choose framebuffer size + chooseFramebufferSize(framebufferWidth, framebufferHeight, mode); + + // Recreate the framebuffers and its associated resources + createFramebufferResources(); + } + + bool onFrame(u64 ns) override + { + hidScanInput(); + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + return false; + + float time = ns / 1000000000.0; // double precision division; followed by implicit cast to single precision + float tau = glm::two_pi(); + + float period1 = fractf(time/8.0f); + float period2 = fractf(time/4.0f); + + // Generate the model-view matrix for this frame + // Keep in mind that GLM transformation functions multiply to the right, so essentially we have: + // mdlvMtx = Translate * RotateX * RotateY * Translate + // This means that the Scale operation is applied first, then RotateY, and so on. + transformState.mdlvMtx = glm::mat4{1.0f}; + transformState.mdlvMtx = glm::translate(transformState.mdlvMtx, glm::vec3{sinf(period1*tau), 0.0f, -3.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, sinf(period2 * tau) * tau / 8.0f, glm::vec3{1.0f, 0.0f, 0.0f}); + transformState.mdlvMtx = glm::rotate(transformState.mdlvMtx, -period1 * tau, glm::vec3{0.0f, 1.0f, 0.0f}); + transformState.mdlvMtx = glm::translate(transformState.mdlvMtx, glm::vec3{0.0f, -0.5f, 0.0f}); + + render(); + return true; + } +}; + +void Example08(void) +{ + CExample08 app; + app.run(); +} diff --git a/graphics/deko3d/deko_examples/source/Example09_SimpleCompute.cpp b/graphics/deko3d/deko_examples/source/Example09_SimpleCompute.cpp new file mode 100644 index 0000000..324ca8a --- /dev/null +++ b/graphics/deko3d/deko_examples/source/Example09_SimpleCompute.cpp @@ -0,0 +1,318 @@ +/* +** deko3d Example 09: Simple Compute Shader (Geometry Generation) +** This example shows how to use a compute shader to dynamically generate geometry. +** New concepts in this example: +** - Enabling compute support in a queue +** - Configuring and using compute shaders +** - Setting up shader storage buffers (SSBOs) +** - Dispatching compute jobs +** - Using a primitive barrier to ensure ordering of items +** - Drawing geometry generated dynamically by the GPU itself +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" +#include "SampleFramework/CMemPool.h" +#include "SampleFramework/CShader.h" +#include "SampleFramework/CCmdMemRing.h" + +// C++ standard library headers +#include +#include + +// GLM headers +#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES // Enforces GLSL std140/std430 alignment rules for glm types +#define GLM_FORCE_INTRINSICS // Enables usage of SIMD CPU instructions (requiring the above as well) +#include +#include + +namespace +{ + struct Vertex + { + float position[4]; + float color[4]; + }; + + constexpr std::array VertexAttribState = + { + DkVtxAttribState{ 0, 0, offsetof(Vertex, position), DkVtxAttribSize_4x32, DkVtxAttribType_Float, 0 }, + DkVtxAttribState{ 0, 0, offsetof(Vertex, color), DkVtxAttribSize_4x32, DkVtxAttribType_Float, 0 }, + }; + + constexpr std::array VertexBufferState = + { + DkVtxBufferState{ sizeof(Vertex), 0 }, + }; + + struct GeneratorParams + { + glm::vec4 colorA; + glm::vec4 colorB; + float offset; + float scale; + float padding[2]; + }; + + inline float fractf(float x) + { + return x - floorf(x); + } +} + +class CExample09 final : public CApplication +{ + static constexpr unsigned NumFramebuffers = 2; + static constexpr uint32_t FramebufferWidth = 1280; + static constexpr uint32_t FramebufferHeight = 720; + static constexpr unsigned StaticCmdSize = 0x10000; + static constexpr unsigned DynamicCmdSize = 0x10000; + static constexpr unsigned NumVertices = 256; + + dk::UniqueDevice device; + dk::UniqueQueue queue; + + std::optional pool_images; + std::optional pool_code; + std::optional pool_data; + + dk::UniqueCmdBuf cmdbuf; + dk::UniqueCmdBuf dyncmd; + CCmdMemRing dynmem; + + GeneratorParams params; + CMemPool::Handle paramsUniformBuffer; + + CShader computeShader; + CShader vertexShader; + CShader fragmentShader; + + CMemPool::Handle vertexBuffer; + + CMemPool::Handle framebuffers_mem[NumFramebuffers]; + dk::Image framebuffers[NumFramebuffers]; + DkCmdList framebuffer_cmdlists[NumFramebuffers]; + dk::UniqueSwapchain swapchain; + + DkCmdList compute_cmdlist, render_cmdlist; + +public: + CExample09() + { + // Create the deko3d device + device = dk::DeviceMaker{}.create(); + + // Create the main queue + queue = dk::QueueMaker{device}.setFlags(DkQueueFlags_Graphics | DkQueueFlags_Compute).create(); + + // Create the memory pools + pool_images.emplace(device, DkMemBlockFlags_GpuCached | DkMemBlockFlags_Image, 16*1024*1024); + pool_code.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached | DkMemBlockFlags_Code, 128*1024); + pool_data.emplace(device, DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, 1*1024*1024); + + // Create the static command buffer and feed it freshly allocated memory + cmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle cmdmem = pool_data->allocate(StaticCmdSize); + cmdbuf.addMemory(cmdmem.getMemBlock(), cmdmem.getOffset(), cmdmem.getSize()); + + // Create the dynamic command buffer and allocate memory for it + dyncmd = dk::CmdBufMaker{device}.create(); + dynmem.allocate(*pool_data, DynamicCmdSize); + + // Load the shaders + computeShader.load(*pool_code, "romfs:/shaders/sinewave.dksh"); + vertexShader.load(*pool_code, "romfs:/shaders/basic_vsh.dksh"); + fragmentShader.load(*pool_code, "romfs:/shaders/color_fsh.dksh"); + + // Create the uniform buffer + paramsUniformBuffer = pool_data->allocate(sizeof(params), DK_UNIFORM_BUF_ALIGNMENT); + + // Initialize the params + params.colorA = glm::vec4 { 1.0f, 0.0f, 1.0f, 1.0f }; + params.colorB = glm::vec4 { 0.0f, 1.0f, 0.0f, 1.0f }; + params.offset = 0.0f; + params.scale = 1.0f; + + // Allocate memory for the vertex buffer + vertexBuffer = pool_data->allocate(sizeof(Vertex)*NumVertices, alignof(Vertex)); + + // Create the framebuffer resources + createFramebufferResources(); + } + + ~CExample09() + { + // Destroy the framebuffer resources + destroyFramebufferResources(); + + // Destroy the vertex buffer (not strictly needed in this case) + vertexBuffer.destroy(); + + // Destroy the uniform buffer (not strictly needed in this case) + paramsUniformBuffer.destroy(); + } + + void createFramebufferResources() + { + // Create layout for the framebuffers + dk::ImageLayout layout_framebuffer; + dk::ImageLayoutMaker{device} + .setFlags(DkImageFlags_UsageRender | DkImageFlags_UsagePresent | DkImageFlags_HwCompression) + .setFormat(DkImageFormat_RGBA8_Unorm) + .setDimensions(FramebufferWidth, FramebufferHeight) + .initialize(layout_framebuffer); + + // Create the framebuffers + std::array fb_array; + uint64_t fb_size = layout_framebuffer.getSize(); + uint32_t fb_align = layout_framebuffer.getAlignment(); + for (unsigned i = 0; i < NumFramebuffers; i ++) + { + // Allocate a framebuffer + framebuffers_mem[i] = pool_images->allocate(fb_size, fb_align); + framebuffers[i].initialize(layout_framebuffer, framebuffers_mem[i].getMemBlock(), framebuffers_mem[i].getOffset()); + + // Generate a command list that binds it + dk::ImageView colorTarget{ framebuffers[i] }; + cmdbuf.bindRenderTargets(&colorTarget); + framebuffer_cmdlists[i] = cmdbuf.finishList(); + + // Fill in the array for use later by the swapchain creation code + fb_array[i] = &framebuffers[i]; + } + + // Create the swapchain using the framebuffers + swapchain = dk::SwapchainMaker{device, nwindowGetDefault(), fb_array}.create(); + + // Generate the main rendering cmdlist + recordStaticCommands(); + } + + void destroyFramebufferResources() + { + // Return early if we have nothing to destroy + if (!swapchain) return; + + // Make sure the queue is idle before destroying anything + queue.waitIdle(); + + // Clear the static cmdbuf, destroying the static cmdlists in the process + cmdbuf.clear(); + + // Destroy the swapchain + swapchain.destroy(); + + // Destroy the framebuffers + for (unsigned i = 0; i < NumFramebuffers; i ++) + framebuffers_mem[i].destroy(); + } + + void recordStaticCommands() + { + // Bind state required for running the compute job + cmdbuf.bindShaders(DkStageFlag_Compute, { computeShader }); + cmdbuf.bindUniformBuffer(DkStage_Compute, 0, paramsUniformBuffer.getGpuAddr(), paramsUniformBuffer.getSize()); + cmdbuf.bindStorageBuffer(DkStage_Compute, 0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize()); + + // Run the compute job + cmdbuf.dispatchCompute(NumVertices/32, 1, 1); + + // Place a barrier + cmdbuf.barrier(DkBarrier_Primitives, 0); + + // Finish off this command list + compute_cmdlist = cmdbuf.finishList(); + + // Initialize state structs with deko3d defaults + dk::RasterizerState rasterizerState; + dk::ColorState colorState; + dk::ColorWriteState colorWriteState; + dk::BlendState blendState; + + // Configure rasterizer state: enable polygon smoothing + rasterizerState.setPolygonSmoothEnable(true); + + // Configure color state: enable blending (needed for polygon smoothing since it generates alpha values) + colorState.setBlendEnable(0, true); + + // Configure viewport and scissor + cmdbuf.setViewports(0, { { 0.0f, 0.0f, FramebufferWidth, FramebufferHeight, 0.0f, 1.0f } }); + cmdbuf.setScissors(0, { { 0, 0, FramebufferWidth, FramebufferHeight } }); + + // Clear the color buffer + cmdbuf.clearColor(0, DkColorMask_RGBA, 0.0f, 0.0f, 0.0f, 0.0f); + + // Bind state required for drawing the triangle + cmdbuf.bindShaders(DkStageFlag_GraphicsMask, { vertexShader, fragmentShader }); + cmdbuf.bindRasterizerState(rasterizerState); + cmdbuf.bindColorState(colorState); + cmdbuf.bindColorWriteState(colorWriteState); + cmdbuf.bindBlendStates(0, blendState); + cmdbuf.bindVtxBuffer(0, vertexBuffer.getGpuAddr(), vertexBuffer.getSize()); + cmdbuf.bindVtxAttribState(VertexAttribState); + cmdbuf.bindVtxBufferState(VertexBufferState); + cmdbuf.setLineWidth(16.0f); + + // Draw the line + cmdbuf.draw(DkPrimitive_LineStrip, NumVertices, 1, 0, 0); + + // Finish off this command list + render_cmdlist = cmdbuf.finishList(); + } + + void render() + { + // Begin generating the dynamic command list, for commands that need to be sent only this frame specifically + dynmem.begin(dyncmd); + + // Update the uniform buffer with the new state (this data gets inlined in the command list) + dyncmd.pushConstants( + paramsUniformBuffer.getGpuAddr(), paramsUniformBuffer.getSize(), + 0, sizeof(params), ¶ms); + + // Finish off the dynamic command list (which also submits it to the queue) + queue.submitCommands(dynmem.end(dyncmd)); + + // Run the compute command list + queue.submitCommands(compute_cmdlist); + + // Acquire a framebuffer from the swapchain (and wait for it to be available) + int slot = queue.acquireImage(swapchain); + + // Run the command list that attaches said framebuffer to the queue + queue.submitCommands(framebuffer_cmdlists[slot]); + + // Run the main rendering command list + queue.submitCommands(render_cmdlist); + + // Now that we are done rendering, present it to the screen + queue.presentImage(swapchain, slot); + } + + bool onFrame(u64 ns) override + { + hidScanInput(); + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + return false; + + float time = ns / 1000000000.0; // double precision division; followed by implicit cast to single precision + float tau = glm::two_pi(); + + params.offset = fractf(time/4.0f); + + float xx = fractf(time * 135.0f / 60.0f / 2.0f); + params.scale = cosf(xx*tau); + params.colorA.g = powf(fabsf(params.scale), 4.0f); + params.colorB.g = 1.0f - params.colorA.g; + + render(); + return true; + } +}; + +void Example09(void) +{ + CExample09 app; + app.run(); +} diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CApplication.cpp b/graphics/deko3d/deko_examples/source/SampleFramework/CApplication.cpp new file mode 100644 index 0000000..34786a3 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CApplication.cpp @@ -0,0 +1,69 @@ +/* +** Sample Framework for deko3d Applications +** CApplication.cpp: Wrapper class containing common application boilerplate +*/ +#include "CApplication.h" + +CApplication::CApplication() +{ + appletLockExit(); + appletSetFocusHandlingMode(AppletFocusHandlingMode_NoSuspend); +} + +CApplication::~CApplication() +{ + appletSetFocusHandlingMode(AppletFocusHandlingMode_SuspendHomeSleep); + appletUnlockExit(); +} + +void CApplication::run() +{ + u64 tick_ref = armGetSystemTick(); + u64 tick_saved = tick_ref; + bool focused = appletGetFocusState() == AppletFocusState_Focused; + + onOperationMode(appletGetOperationMode()); + + for (;;) + { + u32 msg = 0; + Result rc = appletGetMessage(&msg); + if (R_SUCCEEDED(rc)) + { + bool should_close = !appletProcessMessage(msg); + if (should_close) + return; + + switch (msg) + { + case AppletMessage_FocusStateChanged: + { + bool old_focused = focused; + AppletFocusState state = appletGetFocusState(); + focused = state == AppletFocusState_Focused; + + onFocusState(state); + if (focused == old_focused) + break; + if (focused) + { + appletSetFocusHandlingMode(AppletFocusHandlingMode_NoSuspend); + tick_ref += armGetSystemTick() - tick_saved; + } + else + { + tick_saved = armGetSystemTick(); + appletSetFocusHandlingMode(AppletFocusHandlingMode_SuspendHomeSleepNotify); + } + break; + } + case AppletMessage_OperationModeChanged: + onOperationMode(appletGetOperationMode()); + break; + } + } + + if (focused && !onFrame(armTicksToNs(armGetSystemTick() - tick_ref))) + break; + } +} diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CApplication.h b/graphics/deko3d/deko_examples/source/SampleFramework/CApplication.h new file mode 100644 index 0000000..2d80450 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CApplication.h @@ -0,0 +1,38 @@ +/* +** Sample Framework for deko3d Applications +** CApplication.h: Wrapper class containing common application boilerplate +*/ +#pragma once +#include "common.h" + +class CApplication +{ +protected: + virtual void onFocusState(AppletFocusState) { } + virtual void onOperationMode(AppletOperationMode) { } + virtual bool onFrame(u64) { return true; } + +public: + CApplication(); + ~CApplication(); + + void run(); + + static constexpr void chooseFramebufferSize(uint32_t& width, uint32_t& height, AppletOperationMode mode); +}; + +constexpr void CApplication::chooseFramebufferSize(uint32_t& width, uint32_t& height, AppletOperationMode mode) +{ + switch (mode) + { + default: + case AppletOperationMode_Handheld: + width = 1280; + height = 720; + break; + case AppletOperationMode_Docked: + width = 1920; + height = 1080; + break; + } +} diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CCmdMemRing.h b/graphics/deko3d/deko_examples/source/SampleFramework/CCmdMemRing.h new file mode 100644 index 0000000..3a8e1a0 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CCmdMemRing.h @@ -0,0 +1,57 @@ +/* +** Sample Framework for deko3d Applications +** CCmdMemRing.h: Memory provider class for dynamic command buffers +*/ +#pragma once +#include "common.h" +#include "CMemPool.h" + +template +class CCmdMemRing +{ + static_assert(NumSlices > 0, "Need a non-zero number of slices..."); + CMemPool::Handle m_mem; + unsigned m_curSlice; + dk::Fence m_fences[NumSlices]; +public: + CCmdMemRing() : m_mem{}, m_curSlice{}, m_fences{} { } + ~CCmdMemRing() + { + m_mem.destroy(); + } + + bool allocate(CMemPool& pool, uint32_t sliceSize) + { + sliceSize = (sliceSize + DK_CMDMEM_ALIGNMENT - 1) &~ (DK_CMDMEM_ALIGNMENT - 1); + m_mem = pool.allocate(NumSlices*sliceSize); + return m_mem; + } + + void begin(dk::CmdBuf cmdbuf) + { + // Clear/reset the command buffer, which also destroys all command list handles + // (but remember: it does *not* in fact destroy the command data) + cmdbuf.clear(); + + // Wait for the current slice of memory to be available, and feed it to the command buffer + uint32_t sliceSize = m_mem.getSize() / NumSlices; + m_fences[m_curSlice].wait(); + + // Feed the memory to the command buffer + cmdbuf.addMemory(m_mem.getMemBlock(), m_mem.getOffset() + m_curSlice * sliceSize, sliceSize); + } + + DkCmdList end(dk::CmdBuf cmdbuf) + { + // Signal the fence corresponding to the current slice; so that in the future when we want + // to use it again, we can wait for the completion of the commands we've just submitted + // (and as such we don't overwrite in-flight command data with new one) + cmdbuf.signalFence(m_fences[m_curSlice]); + + // Advance the current slice counter; wrapping around when we reach the end + m_curSlice = (m_curSlice + 1) % NumSlices; + + // Finish off the command list, returning it to the caller + return cmdbuf.finishList(); + } +}; diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CDescriptorSet.h b/graphics/deko3d/deko_examples/source/SampleFramework/CDescriptorSet.h new file mode 100644 index 0000000..a1c0ed9 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CDescriptorSet.h @@ -0,0 +1,71 @@ +/* +** Sample Framework for deko3d Applications +** CDescriptorSet.h: Image/Sampler descriptor set class +*/ +#pragma once +#include "common.h" +#include "CMemPool.h" + +template +class CDescriptorSet +{ + static_assert(NumDescriptors > 0, "Need a non-zero number of descriptors..."); + static_assert(sizeof(DkImageDescriptor) == sizeof(DkSamplerDescriptor), "shouldn't happen"); + static_assert(DK_IMAGE_DESCRIPTOR_ALIGNMENT == DK_SAMPLER_DESCRIPTOR_ALIGNMENT, "shouldn't happen"); + static constexpr size_t DescriptorSize = sizeof(DkImageDescriptor); + static constexpr size_t DescriptorAlign = DK_IMAGE_DESCRIPTOR_ALIGNMENT; + + CMemPool::Handle m_mem; +public: + CDescriptorSet() : m_mem{} { } + ~CDescriptorSet() + { + m_mem.destroy(); + } + + bool allocate(CMemPool& pool) + { + m_mem = pool.allocate(NumDescriptors*DescriptorSize, DescriptorAlign); + return m_mem; + } + + void bindForImages(dk::CmdBuf cmdbuf) + { + cmdbuf.bindImageDescriptorSet(m_mem.getGpuAddr(), NumDescriptors); + } + + void bindForSamplers(dk::CmdBuf cmdbuf) + { + cmdbuf.bindSamplerDescriptorSet(m_mem.getGpuAddr(), NumDescriptors); + } + + template + void update(dk::CmdBuf cmdbuf, uint32_t id, T const& descriptor) + { + static_assert(sizeof(T) == DescriptorSize); + cmdbuf.pushData(m_mem.getGpuAddr() + id*DescriptorSize, &descriptor, DescriptorSize); + } + + template + void update(dk::CmdBuf cmdbuf, uint32_t id, std::array const& descriptors) + { + static_assert(sizeof(T) == DescriptorSize); + cmdbuf.pushData(m_mem.getGpuAddr() + id*DescriptorSize, descriptors.data(), descriptors.size()*DescriptorSize); + } + +#ifdef DK_HPP_SUPPORT_VECTOR + template > + void update(dk::CmdBuf cmdbuf, uint32_t id, std::vector const& descriptors) + { + static_assert(sizeof(T) == DescriptorSize); + cmdbuf.pushData(m_mem.getGpuAddr() + id*DescriptorSize, descriptors.data(), descriptors.size()*DescriptorSize); + } +#endif + + template + void update(dk::CmdBuf cmdbuf, uint32_t id, std::initializer_list const& descriptors) + { + static_assert(sizeof(T) == DescriptorSize); + cmdbuf.pushData(m_mem.getGpuAddr() + id*DescriptorSize, descriptors.data(), descriptors.size()*DescriptorSize); + } +}; diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CExternalImage.cpp b/graphics/deko3d/deko_examples/source/SampleFramework/CExternalImage.cpp new file mode 100644 index 0000000..37b6a26 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CExternalImage.cpp @@ -0,0 +1,37 @@ +/* +** Sample Framework for deko3d Applications +** CExternalImage.cpp: Utility class for loading images from the filesystem +*/ +#include "CExternalImage.h" +#include "FileLoader.h" + +bool CExternalImage::load(CMemPool& imagePool, CMemPool& scratchPool, dk::Device device, dk::Queue transferQueue, const char* path, uint32_t width, uint32_t height, DkImageFormat format, uint32_t flags) +{ + CMemPool::Handle tempimgmem = LoadFile(scratchPool, path, DK_IMAGE_LINEAR_STRIDE_ALIGNMENT); + if (!tempimgmem) + return false; + + dk::UniqueCmdBuf tempcmdbuf = dk::CmdBufMaker{device}.create(); + CMemPool::Handle tempcmdmem = scratchPool.allocate(DK_MEMBLOCK_ALIGNMENT); + tempcmdbuf.addMemory(tempcmdmem.getMemBlock(), tempcmdmem.getOffset(), tempcmdmem.getSize()); + + dk::ImageLayout layout; + dk::ImageLayoutMaker{device} + .setFlags(flags) + .setFormat(format) + .setDimensions(width, height) + .initialize(layout); + + m_mem = imagePool.allocate(layout.getSize(), layout.getAlignment()); + m_image.initialize(layout, m_mem.getMemBlock(), m_mem.getOffset()); + m_descriptor.initialize(m_image); + + dk::ImageView imageView{m_image}; + tempcmdbuf.copyBufferToImage({ tempimgmem.getGpuAddr() }, imageView, { 0, 0, 0, width, height, 1 }); + transferQueue.submitCommands(tempcmdbuf.finishList()); + transferQueue.waitIdle(); + + tempcmdmem.destroy(); + tempimgmem.destroy(); + return true; +} diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CExternalImage.h b/graphics/deko3d/deko_examples/source/SampleFramework/CExternalImage.h new file mode 100644 index 0000000..230e2e9 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CExternalImage.h @@ -0,0 +1,37 @@ +/* +** Sample Framework for deko3d Applications +** CExternalImage.h: Utility class for loading images from the filesystem +*/ +#pragma once +#include "common.h" +#include "CMemPool.h" + +class CExternalImage +{ + dk::Image m_image; + dk::ImageDescriptor m_descriptor; + CMemPool::Handle m_mem; +public: + CExternalImage() : m_image{}, m_descriptor{}, m_mem{} { } + ~CExternalImage() + { + m_mem.destroy(); + } + + constexpr operator bool() const + { + return m_mem; + } + + constexpr dk::Image& get() + { + return m_image; + } + + constexpr dk::ImageDescriptor const& getDescriptor() const + { + return m_descriptor; + } + + bool load(CMemPool& imagePool, CMemPool& scratchPool, dk::Device device, dk::Queue transferQueue, const char* path, uint32_t width, uint32_t height, DkImageFormat format, uint32_t flags = 0); +}; diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CIntrusiveList.h b/graphics/deko3d/deko_examples/source/SampleFramework/CIntrusiveList.h new file mode 100644 index 0000000..73eb5c8 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CIntrusiveList.h @@ -0,0 +1,119 @@ +/* +** Sample Framework for deko3d Applications +** CIntrusiveList.h: Intrusive doubly-linked list helper class +*/ +#pragma once +#include "common.h" + +template +struct CIntrusiveListNode +{ + T *m_next, *m_prev; + + constexpr CIntrusiveListNode() : m_next{}, m_prev{} { } + constexpr operator bool() const { return m_next || m_prev; } +}; + +template T::* node_ptr> +class CIntrusiveList +{ + T *m_first, *m_last; + +public: + constexpr CIntrusiveList() : m_first{}, m_last{} { } + + constexpr T* first() const { return m_first; } + constexpr T* last() const { return m_last; } + constexpr bool empty() const { return !m_first; } + constexpr void clear() { m_first = m_last = nullptr; } + + constexpr bool isLinked(T* obj) const { return obj->*node_ptr || m_first == obj; } + constexpr T* prev(T* obj) const { return (obj->*node_ptr).m_prev; } + constexpr T* next(T* obj) const { return (obj->*node_ptr).m_next; } + + void add(T* obj) + { + return addBefore(nullptr, obj); + } + + void addBefore(T* pos, T* obj) + { + auto& node = obj->*node_ptr; + node.m_next = pos; + node.m_prev = pos ? (pos->*node_ptr).m_prev : m_last; + + if (pos) + (pos->*node_ptr).m_prev = obj; + else + m_last = obj; + + if (node.m_prev) + (node.m_prev->*node_ptr).m_next = obj; + else + m_first = obj; + } + + void addAfter(T* pos, T* obj) + { + auto& node = obj->*node_ptr; + node.m_next = pos ? (pos->*node_ptr).m_next : m_first; + node.m_prev = pos; + + if (pos) + (pos->*node_ptr).m_next = obj; + else + m_first = obj; + + if (node.m_next) + (node.m_next->*node_ptr).m_prev = obj; + else + m_last = obj; + } + + T* pop() + { + T* ret = m_first; + if (ret) + { + m_first = (ret->*node_ptr).m_next; + if (m_first) + (m_first->*node_ptr).m_prev = nullptr; + else + m_last = nullptr; + } + return ret; + } + + void remove(T* obj) + { + auto& node = obj->*node_ptr; + if (node.m_prev) + { + (node.m_prev->*node_ptr).m_next = node.m_next; + if (node.m_next) + (node.m_next->*node_ptr).m_prev = node.m_prev; + else + m_last = node.m_prev; + } else + { + m_first = node.m_next; + if (m_first) + (m_first->*node_ptr).m_prev = nullptr; + else + m_last = nullptr; + } + + node.m_next = node.m_prev = 0; + } + + template + void iterate(L lambda) const + { + T* next = nullptr; + for (T* cur = m_first; cur; cur = next) + { + next = (cur->*node_ptr).m_next; + lambda(cur); + } + } +}; diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CIntrusiveTree.cpp b/graphics/deko3d/deko_examples/source/SampleFramework/CIntrusiveTree.cpp new file mode 100644 index 0000000..9f21b63 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CIntrusiveTree.cpp @@ -0,0 +1,214 @@ +/* +** Sample Framework for deko3d Applications +** CIntrusiveTree.cpp: Intrusive red-black tree helper class +*/ +#include "CIntrusiveTree.h" + +// This red-black tree implementation is mostly based on mtheall's work, +// which can be found here: +// https://github.com/smealum/ctrulib/tree/master/libctru/source/util/rbtree + +void CIntrusiveTreeBase::rotate(N* node, N::Leaf leaf) +{ + N *tmp = node->child(leaf); + N *parent = node->getParent(); + + node->child(leaf) = tmp->child(!leaf); + if (tmp->child(!leaf)) + tmp->child(!leaf)->setParent(node); + + tmp->child(!leaf) = node; + tmp->setParent(parent); + + if (parent) + { + if (node == parent->child(!leaf)) + parent->child(!leaf) = tmp; + else + parent->child(leaf) = tmp; + } + else + m_root = tmp; + + node->setParent(tmp); +} + +void CIntrusiveTreeBase::recolor(N* parent, N* node) +{ + N *sibling; + + while ((!node || node->isBlack()) && node != m_root) + { + N::Leaf leaf = node == parent->left() ? N::Right : N::Left; + sibling = parent->child(leaf); + + if (sibling->isRed()) + { + sibling->setBlack(); + parent->setRed(); + rotate(parent, leaf); + sibling = parent->child(leaf); + } + + N::Color clr[2]; + clr[N::Left] = sibling->left() ? sibling->left()->getColor() : N::Black; + clr[N::Right] = sibling->right() ? sibling->right()->getColor() : N::Black; + + if (clr[N::Left] == N::Black && clr[N::Right] == N::Black) + { + sibling->setRed(); + node = parent; + parent = node->getParent(); + } + else + { + if (clr[leaf] == N::Black) + { + sibling->child(!leaf)->setBlack(); + sibling->setRed(); + rotate(sibling, !leaf); + sibling = parent->child(leaf); + } + + sibling->setColor(parent->getColor()); + parent->setBlack(); + sibling->child(leaf)->setBlack(); + rotate(parent, leaf); + + node = m_root; + } + } + + if (node) + node->setBlack(); +} + +auto CIntrusiveTreeBase::walk(N* node, N::Leaf leaf) const -> N* +{ + if (node->child(leaf)) + { + node = node->child(leaf); + while (node->child(!leaf)) + node = node->child(!leaf); + } + else + { + N *parent = node->getParent(); + while (parent && node == parent->child(leaf)) + { + node = parent; + parent = node->getParent(); + } + node = parent; + } + + return node; +} + +void CIntrusiveTreeBase::insert(N* node, N* parent) +{ + node->left() = node->right() = nullptr; + node->setParent(parent); + node->setRed(); + + while ((parent = node->getParent()) && parent->isRed()) + { + N *grandparent = parent->getParent(); + N::Leaf leaf = parent == grandparent->left() ? N::Right : N::Left; + N *uncle = grandparent->child(leaf); + + if (uncle && uncle->isRed()) + { + uncle->setBlack(); + parent->setBlack(); + grandparent->setRed(); + + node = grandparent; + } + else + { + if (parent->child(leaf) == node) + { + rotate(parent, leaf); + + N* tmp = parent; + parent = node; + node = tmp; + } + + parent->setBlack(); + grandparent->setRed(); + rotate(grandparent, !leaf); + } + } + + m_root->setBlack(); +} + +void CIntrusiveTreeBase::remove(N* node) +{ + N::Color color; + N *child, *parent; + + if (node->left() && node->right()) + { + N *old = node; + + node = node->right(); + while (node->left()) + node = node->left(); + + parent = old->getParent(); + if (parent) + { + if (parent->left() == old) + parent->left() = node; + else + parent->right() = node; + } + else + m_root = node; + + child = node->right(); + parent = node->getParent(); + color = node->getColor(); + + if (parent == old) + parent = node; + else + { + if (child) + child->setParent(parent); + parent->left() = child; + + node->right() = old->right(); + old->right()->setParent(node); + } + + node->setParent(old->getParent()); + node->setColor(old->getColor()); + node->left() = old->left(); + old->left()->setParent(node); + } + else + { + child = node->left() ? node->right() : node->left(); + parent = node->getParent(); + color = node->getColor(); + + if (child) + child->setParent(parent); + if (parent) + { + if (parent->left() == node) + parent->left() = child; + else + parent->right() = child; + } + else + m_root = child; + } + + if (color == N::Black) + recolor(parent, child); +} diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CIntrusiveTree.h b/graphics/deko3d/deko_examples/source/SampleFramework/CIntrusiveTree.h new file mode 100644 index 0000000..9796ee6 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CIntrusiveTree.h @@ -0,0 +1,250 @@ +/* +** Sample Framework for deko3d Applications +** CIntrusiveTree.h: Intrusive red-black tree helper class +*/ +#pragma once +#include "common.h" + +#include + +struct CIntrusiveTreeNode +{ + enum Color + { + Red, + Black, + }; + + enum Leaf + { + Left, + Right, + }; + +private: + uintptr_t m_parent_color; + CIntrusiveTreeNode* m_children[2]; + +public: + constexpr CIntrusiveTreeNode() : m_parent_color{}, m_children{} { } + + constexpr CIntrusiveTreeNode* getParent() const + { + return reinterpret_cast(m_parent_color &~ 1); + } + + void setParent(CIntrusiveTreeNode* parent) + { + m_parent_color = (m_parent_color & 1) | reinterpret_cast(parent); + } + + constexpr Color getColor() const + { + return static_cast(m_parent_color & 1); + } + + void setColor(Color color) + { + m_parent_color = (m_parent_color &~ 1) | static_cast(color); + } + + constexpr CIntrusiveTreeNode*& child(Leaf leaf) + { + return m_children[leaf]; + } + + constexpr CIntrusiveTreeNode* const& child(Leaf leaf) const + { + return m_children[leaf]; + } + + //-------------------------------------- + + constexpr bool isRed() const { return getColor() == Red; } + constexpr bool isBlack() const { return getColor() == Black; } + void setRed() { setColor(Red); } + void setBlack() { setColor(Black); } + + constexpr CIntrusiveTreeNode*& left() { return child(Left); } + constexpr CIntrusiveTreeNode*& right() { return child(Right); } + constexpr CIntrusiveTreeNode* const& left() const { return child(Left); } + constexpr CIntrusiveTreeNode* const& right() const { return child(Right); } +}; + +NX_CONSTEXPR CIntrusiveTreeNode::Leaf operator!(CIntrusiveTreeNode::Leaf val) noexcept +{ + return static_cast(!static_cast(val)); +} + +class CIntrusiveTreeBase +{ + using N = CIntrusiveTreeNode; + + void rotate(N* node, N::Leaf leaf); + void recolor(N* parent, N* node); +protected: + N* m_root; + + constexpr CIntrusiveTreeBase() : m_root{} { } + + N* walk(N* node, N::Leaf leaf) const; + void insert(N* node, N* parent); + void remove(N* node); + + N* minmax(N::Leaf leaf) const + { + N* p = m_root; + if (!p) + return nullptr; + while (p->child(leaf)) + p = p->child(leaf); + return p; + } + + template + N*& navigate(N*& node, N*& parent, N::Leaf leafOnEqual, H helm) const + { + node = nullptr; + parent = nullptr; + + N** point = const_cast(&m_root); + while (*point) + { + int direction = helm(*point); + parent = *point; + if (direction < 0) + point = &(*point)->left(); + else if (direction > 0) + point = &(*point)->right(); + else + { + node = *point; + point = &(*point)->child(leafOnEqual); + } + } + return *point; + } +}; + +template +constexpr ClassT* parent_obj(MemberT* member, MemberT ClassT::* ptr) +{ + union whatever + { + MemberT ClassT::* ptr; + intptr_t offset; + }; + // This is technically UB, but basically every compiler worth using admits it as an extension + return (ClassT*)((intptr_t)member - whatever{ptr}.offset); +} + +template < + typename T, + CIntrusiveTreeNode T::* node_ptr, + typename Comparator = std::less<> +> +class CIntrusiveTree final : protected CIntrusiveTreeBase +{ + using N = CIntrusiveTreeNode; + + static constexpr T* toType(N* m) + { + return m ? parent_obj(m, node_ptr) : nullptr; + } + + static constexpr N* toNode(T* m) + { + return m ? &(m->*node_ptr) : nullptr; + } + + template + static int compare(A const& a, B const& b) + { + Comparator comp; + if (comp(a, b)) + return -1; + if (comp(b, a)) + return 1; + return 0; + } + +public: + constexpr CIntrusiveTree() : CIntrusiveTreeBase{} { } + + T* first() const { return toType(minmax(N::Left)); } + T* last() const { return toType(minmax(N::Right)); } + bool empty() const { return m_root != nullptr; } + void clear() { m_root = nullptr; } + + T* prev(T* node) const { return toType(walk(toNode(node), N::Left)); } + T* next(T* node) const { return toType(walk(toNode(node), N::Right)); } + + enum SearchMode + { + Exact = 0, + LowerBound = 1, + UpperBound = 2, + }; + + template + T* search(SearchMode mode, Lambda lambda) const + { + N *node, *parent; + N*& point = navigate(node, parent, + mode != UpperBound ? N::Left : N::Right, + [&lambda](N* curnode) { return lambda(toType(curnode)); }); + + switch (mode) + { + default: + case Exact: + break; + case LowerBound: + if (!node && parent) + { + if (&parent->left() == &point) + node = parent; + else + node = walk(parent, N::Right); + } + break; + case UpperBound: + if (node) + node = walk(node, N::Right); + else if (parent) + { + if (&parent->right() == &point) + node = walk(parent, N::Right); + else + node = parent; + } + break; + } + return toType(node); + } + + template + T* find(K const& key, SearchMode mode = Exact) const + { + return search(mode, [&key](T* obj) { return compare(key, *obj); }); + } + + T* insert(T* obj, bool allow_dupes = false) + { + N *node, *parent; + N*& point = navigate(node, parent, N::Right, + [obj](N* curnode) { return compare(*obj, *toType(curnode)); }); + + if (node && !allow_dupes) + return toType(node); + + point = toNode(obj); + CIntrusiveTreeBase::insert(point, parent); + return obj; + } + + void remove(T* obj) + { + CIntrusiveTreeBase::remove(toNode(obj)); + } +}; diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CMemPool.cpp b/graphics/deko3d/deko_examples/source/SampleFramework/CMemPool.cpp new file mode 100644 index 0000000..fb3bd10 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CMemPool.cpp @@ -0,0 +1,175 @@ +/* +** Sample Framework for deko3d Applications +** CMemPool.cpp: Pooled dynamic memory allocation manager class +*/ +#include "CMemPool.h" + +inline auto CMemPool::_newSlice() -> Slice* +{ + Slice* ret = m_sliceHeap.pop(); + if (!ret) ret = (Slice*)::malloc(sizeof(Slice)); + return ret; +} + +inline void CMemPool::_deleteSlice(Slice* s) +{ + if (!s) return; + m_sliceHeap.add(s); +} + +CMemPool::~CMemPool() +{ + m_memMap.iterate([](Slice* s) { ::free(s); }); + m_sliceHeap.iterate([](Slice* s) { ::free(s); }); + m_blocks.iterate([](Block* blk) { + blk->m_obj.destroy(); + ::free(blk); + }); +} + +auto CMemPool::allocate(uint32_t size, uint32_t alignment) -> Handle +{ + if (!size) return nullptr; + if (alignment & (alignment - 1)) return nullptr; + size = (size + alignment - 1) &~ (alignment - 1); +#ifdef DEBUG_CMEMPOOL + printf("Allocating size=%u alignment=0x%x\n", size, alignment); + { + Slice* temp = /*m_freeList*/m_memMap.first(); + while (temp) + { + printf("-- blk %p | 0x%08x-0x%08x | %s used\n", temp->m_block, temp->m_start, temp->m_end, temp->m_pool ? " " : "not"); + temp = /*m_freeList*/m_memMap.next(temp); + } + } +#endif + + uint32_t start_offset = 0; + uint32_t end_offset = 0; + Slice* slice = m_freeList.find(size, decltype(m_freeList)::LowerBound); + while (slice) + { +#ifdef DEBUG_CMEMPOOL + printf(" * Checking slice 0x%x - 0x%x\n", slice->m_start, slice->m_end); +#endif + start_offset = (slice->m_start + alignment - 1) &~ (alignment - 1); + end_offset = start_offset + size; + if (end_offset <= slice->m_end) + break; + slice = m_freeList.next(slice); + } + + if (!slice) + { + Block* blk = (Block*)::malloc(sizeof(Block)); + if (!blk) + return nullptr; + + uint32_t unusableSize = (m_flags & DkMemBlockFlags_Code) ? DK_SHADER_CODE_UNUSABLE_SIZE : 0; + uint32_t blkSize = m_blockSize - unusableSize; + blkSize = size > blkSize ? size : blkSize; + blkSize = (blkSize + unusableSize + DK_MEMBLOCK_ALIGNMENT - 1) &~ (DK_MEMBLOCK_ALIGNMENT - 1); +#ifdef DEBUG_CMEMPOOL + printf(" ! Allocating block of size 0x%x\n", blkSize); +#endif + blk->m_obj = dk::MemBlockMaker{m_dev, blkSize}.setFlags(m_flags).create(); + if (!blk->m_obj) + { + ::free(blk); + return nullptr; + } + + slice = _newSlice(); + if (!slice) + { + blk->m_obj.destroy(); + ::free(blk); + return nullptr; + } + + slice->m_pool = nullptr; + slice->m_block = blk; + slice->m_start = 0; + slice->m_end = blkSize - unusableSize; + m_memMap.add(slice); + + blk->m_cpuAddr = blk->m_obj.getCpuAddr(); + blk->m_gpuAddr = blk->m_obj.getGpuAddr(); + m_blocks.add(blk); + + start_offset = 0; + end_offset = size; + } + else + { +#ifdef DEBUG_CMEMPOOL + printf(" * found it\n"); +#endif + m_freeList.remove(slice); + } + + if (start_offset != slice->m_start) + { + Slice* t = _newSlice(); + if (!t) goto _bad; + t->m_pool = nullptr; + t->m_block = slice->m_block; + t->m_start = slice->m_start; + t->m_end = start_offset; +#ifdef DEBUG_CMEMPOOL + printf("-> subdivide left: %08x-%08x\n", t->m_start, t->m_end); +#endif + m_memMap.addBefore(slice, t); + m_freeList.insert(t, true); + slice->m_start = start_offset; + } + + if (end_offset != slice->m_end) + { + Slice* t = _newSlice(); + if (!t) goto _bad; + t->m_pool = nullptr; + t->m_block = slice->m_block; + t->m_start = end_offset; + t->m_end = slice->m_end; +#ifdef DEBUG_CMEMPOOL + printf("-> subdivide right: %08x-%08x\n", t->m_start, t->m_end); +#endif + m_memMap.addAfter(slice, t); + m_freeList.insert(t, true); + slice->m_end = end_offset; + } + + slice->m_pool = this; + return slice; + +_bad: + m_freeList.insert(slice, true); + return nullptr; +} + +void CMemPool::_destroy(Slice* slice) +{ + slice->m_pool = nullptr; + + Slice* left = m_memMap.prev(slice); + Slice* right = m_memMap.next(slice); + + if (left && left->canCoalesce(*slice)) + { + slice->m_start = left->m_start; + m_freeList.remove(left); + m_memMap.remove(left); + _deleteSlice(left); + } + + if (right && slice->canCoalesce(*right)) + { + slice->m_end = right->m_end; + m_freeList.remove(right); + m_memMap.remove(right); + _deleteSlice(right); + } + + m_freeList.insert(slice, true); +} diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CMemPool.h b/graphics/deko3d/deko_examples/source/SampleFramework/CMemPool.h new file mode 100644 index 0000000..978755c --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CMemPool.h @@ -0,0 +1,120 @@ +/* +** Sample Framework for deko3d Applications +** CMemPool.h: Pooled dynamic memory allocation manager class +*/ +#pragma once +#include "common.h" +#include "CIntrusiveList.h" +#include "CIntrusiveTree.h" + +class CMemPool +{ + dk::Device m_dev; + uint32_t m_flags; + uint32_t m_blockSize; + + struct Block + { + CIntrusiveListNode m_node; + dk::MemBlock m_obj; + void* m_cpuAddr; + DkGpuAddr m_gpuAddr; + + constexpr void* cpuOffset(uint32_t offset) const + { + return m_cpuAddr ? ((u8*)m_cpuAddr + offset) : nullptr; + } + + constexpr DkGpuAddr gpuOffset(uint32_t offset) const + { + return m_gpuAddr != DK_GPU_ADDR_INVALID ? (m_gpuAddr + offset) : DK_GPU_ADDR_INVALID; + } + }; + + CIntrusiveList m_blocks; + + struct Slice + { + CIntrusiveListNode m_node; + CIntrusiveTreeNode m_treenode; + CMemPool* m_pool; + Block* m_block; + uint32_t m_start; + uint32_t m_end; + + constexpr uint32_t getSize() const { return m_end - m_start; } + constexpr bool canCoalesce(Slice const& rhs) const { return m_pool == rhs.m_pool && m_block == rhs.m_block && m_end == rhs.m_start; } + + constexpr bool operator<(Slice const& rhs) const { return getSize() < rhs.getSize(); } + constexpr bool operator<(uint32_t rhs) const { return getSize() < rhs; } + }; + + friend constexpr bool operator<(uint32_t lhs, Slice const& rhs); + + CIntrusiveList m_memMap, m_sliceHeap; + CIntrusiveTree m_freeList; + + Slice* _newSlice(); + void _deleteSlice(Slice*); + + void _destroy(Slice* slice); + +public: + static constexpr uint32_t DefaultBlockSize = 0x800000; + class Handle + { + Slice* m_slice; + public: + constexpr Handle(Slice* slice = nullptr) : m_slice{slice} { } + constexpr operator bool() const { return m_slice != nullptr; } + constexpr operator Slice*() const { return m_slice; } + constexpr bool operator!() const { return !m_slice; } + constexpr bool operator==(Handle const& rhs) const { return m_slice == rhs.m_slice; } + constexpr bool operator!=(Handle const& rhs) const { return m_slice != rhs.m_slice; } + + void destroy() + { + if (m_slice) + { + m_slice->m_pool->_destroy(m_slice); + m_slice = nullptr; + } + } + + constexpr dk::MemBlock getMemBlock() const + { + return m_slice->m_block->m_obj; + } + + constexpr uint32_t getOffset() const + { + return m_slice->m_start; + } + + constexpr uint32_t getSize() const + { + return m_slice->getSize(); + } + + constexpr void* getCpuAddr() const + { + return m_slice->m_block->cpuOffset(m_slice->m_start); + } + + constexpr DkGpuAddr getGpuAddr() const + { + return m_slice->m_block->gpuOffset(m_slice->m_start); + } + }; + + CMemPool(dk::Device dev, uint32_t flags = DkMemBlockFlags_CpuUncached | DkMemBlockFlags_GpuCached, uint32_t blockSize = DefaultBlockSize) : + m_dev{dev}, m_flags{flags}, m_blockSize{blockSize}, m_blocks{}, m_memMap{}, m_sliceHeap{}, m_freeList{} { } + ~CMemPool(); + + Handle allocate(uint32_t size, uint32_t alignment = DK_CMDMEM_ALIGNMENT); +}; + +constexpr bool operator<(uint32_t lhs, CMemPool::Slice const& rhs) +{ + return lhs < rhs.getSize(); +} diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CShader.cpp b/graphics/deko3d/deko_examples/source/SampleFramework/CShader.cpp new file mode 100644 index 0000000..6c5361c --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CShader.cpp @@ -0,0 +1,62 @@ +/* +** Sample Framework for deko3d Applications +** CShader.cpp: Utility class for loading shaders from the filesystem +*/ +#include "CShader.h" + +struct DkshHeader +{ + uint32_t magic; // DKSH_MAGIC + uint32_t header_sz; // sizeof(DkshHeader) + uint32_t control_sz; + uint32_t code_sz; + uint32_t programs_off; + uint32_t num_programs; +}; + +bool CShader::load(CMemPool& pool, const char* path) +{ + FILE* f; + DkshHeader hdr; + void* ctrlmem; + + m_codemem.destroy(); + + f = fopen(path, "rb"); + if (!f) return false; + + if (!fread(&hdr, sizeof(hdr), 1, f)) + goto _fail0; + + ctrlmem = malloc(hdr.control_sz); + if (!ctrlmem) + goto _fail0; + + rewind(f); + if (!fread(ctrlmem, hdr.control_sz, 1, f)) + goto _fail1; + + m_codemem = pool.allocate(hdr.code_sz, DK_SHADER_CODE_ALIGNMENT); + if (!m_codemem) + goto _fail1; + + if (!fread(m_codemem.getCpuAddr(), hdr.code_sz, 1, f)) + goto _fail2; + + dk::ShaderMaker{m_codemem.getMemBlock(), m_codemem.getOffset()} + .setControl(ctrlmem) + .setProgramId(0) + .initialize(m_shader); + + free(ctrlmem); + fclose(f); + return true; + +_fail2: + m_codemem.destroy(); +_fail1: + free(ctrlmem); +_fail0: + fclose(f); + return false; +} diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/CShader.h b/graphics/deko3d/deko_examples/source/SampleFramework/CShader.h new file mode 100644 index 0000000..b39dfe0 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/CShader.h @@ -0,0 +1,31 @@ +/* +** Sample Framework for deko3d Applications +** CShader.h: Utility class for loading shaders from the filesystem +*/ +#pragma once +#include "common.h" +#include "CMemPool.h" + +class CShader +{ + dk::Shader m_shader; + CMemPool::Handle m_codemem; +public: + CShader() : m_shader{}, m_codemem{} { } + ~CShader() + { + m_codemem.destroy(); + } + + constexpr operator bool() const + { + return m_codemem; + } + + constexpr operator dk::Shader const*() const + { + return &m_shader; + } + + bool load(CMemPool& pool, const char* path); +}; diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/FileLoader.cpp b/graphics/deko3d/deko_examples/source/SampleFramework/FileLoader.cpp new file mode 100644 index 0000000..a9651bf --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/FileLoader.cpp @@ -0,0 +1,27 @@ +/* +** Sample Framework for deko3d Applications +** FileLoader.cpp: Helpers for loading data from the filesystem directly into GPU memory +*/ +#include "FileLoader.h" + +CMemPool::Handle LoadFile(CMemPool& pool, const char* path, uint32_t alignment) +{ + FILE *f = fopen(path, "rb"); + if (!f) return nullptr; + + fseek(f, 0, SEEK_END); + uint32_t fsize = ftell(f); + rewind(f); + + CMemPool::Handle mem = pool.allocate(fsize, alignment); + if (!mem) + { + fclose(f); + return nullptr; + } + + fread(mem.getCpuAddr(), fsize, 1, f); + fclose(f); + + return mem; +} diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/FileLoader.h b/graphics/deko3d/deko_examples/source/SampleFramework/FileLoader.h new file mode 100644 index 0000000..3455c87 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/FileLoader.h @@ -0,0 +1,9 @@ +/* +** Sample Framework for deko3d Applications +** FileLoader.h: Helpers for loading data from the filesystem directly into GPU memory +*/ +#pragma once +#include "common.h" +#include "CMemPool.h" + +CMemPool::Handle LoadFile(CMemPool& pool, const char* path, uint32_t alignment = DK_CMDMEM_ALIGNMENT); diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/LICENSE b/graphics/deko3d/deko_examples/source/SampleFramework/LICENSE new file mode 100644 index 0000000..183debc --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/LICENSE @@ -0,0 +1,18 @@ +Copyright (C) 2020 fincs + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any +damages arising from the use of this software. + +Permission is granted to anyone to use this software for any +purpose, including commercial applications, and to alter it and +redistribute it freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you + must not claim that you wrote the original software. If you use + this software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and + must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source + distribution. diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/common.h b/graphics/deko3d/deko_examples/source/SampleFramework/common.h new file mode 100644 index 0000000..814e499 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/common.h @@ -0,0 +1,12 @@ +/* +** Sample Framework for deko3d Applications +** common.h: Common includes +*/ +#pragma once +#include +#include +#include + +#include + +#include diff --git a/graphics/deko3d/deko_examples/source/SampleFramework/startup.cpp b/graphics/deko3d/deko_examples/source/SampleFramework/startup.cpp new file mode 100644 index 0000000..fd2b522 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/SampleFramework/startup.cpp @@ -0,0 +1,34 @@ +/* +** Sample Framework for deko3d Applications +** startup.cpp: Automatic initialization/deinitialization +*/ +#include "common.h" + +//#define DEBUG_NXLINK + +#ifdef DEBUG_NXLINK +static int nxlink_sock = -1; +#endif + +extern "C" void userAppInit(void) +{ + Result res = romfsInit(); + if (R_FAILED(res)) + fatalThrow(res); + +#ifdef DEBUG_NXLINK + socketInitializeDefault(); + nxlink_sock = nxlinkStdio(); +#endif +} + +extern "C" void userAppExit(void) +{ +#ifdef DEBUG_NXLINK + if (nxlink_sock != -1) + close(nxlink_sock); + socketExit(); +#endif + + romfsExit(); +} diff --git a/graphics/deko3d/deko_examples/source/basic_deferred_fsh.glsl b/graphics/deko3d/deko_examples/source/basic_deferred_fsh.glsl new file mode 100644 index 0000000..3a45131 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/basic_deferred_fsh.glsl @@ -0,0 +1,15 @@ +#version 460 + +layout (location = 0) in vec3 inWorldPos; +layout (location = 1) in vec3 inNormal; + +layout (location = 0) out vec4 outAlbedo; +layout (location = 1) out vec4 outNormal; +layout (location = 2) out vec4 outViewDir; + +void main() +{ + outAlbedo = vec4(1.0, 1.0, 1.0, 1.0); + outNormal = vec4(normalize(inNormal), 0.0); + outViewDir = vec4(-inWorldPos, 0.0); +} diff --git a/graphics/deko3d/deko_examples/source/basic_lighting_fsh.glsl b/graphics/deko3d/deko_examples/source/basic_lighting_fsh.glsl new file mode 100644 index 0000000..e95b687 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/basic_lighting_fsh.glsl @@ -0,0 +1,46 @@ +#version 460 + +layout (location = 0) in vec3 inWorldPos; +layout (location = 1) in vec3 inNormal; +layout (location = 0) out vec4 outColor; + +layout (std140, binding = 0) uniform Lighting +{ + vec4 lightPos; // if w=0 this is lightDir + vec3 ambient; + vec3 diffuse; + vec4 specular; // w is shininess +} u; + +void main() +{ + // Renormalize the normal after interpolation + vec3 normal = normalize(inNormal); + + // Calculate light direction (i.e. vector that points *towards* the light source) + vec3 lightDir; + if (u.lightPos.w != 0.0) + lightDir = normalize(u.lightPos.xyz - inWorldPos); + else + lightDir = -u.lightPos.xyz; + vec3 viewDir = normalize(-inWorldPos); + + // Calculate diffuse factor + float diffuse = max(0.0, dot(normal,lightDir)); + + // Calculate specular factor (Blinn-Phong) + vec3 halfwayDir = normalize(lightDir + viewDir); + float specular = pow(max(0.0, dot(normal,halfwayDir)), u.specular.w); + + // Calculate the color + vec3 color = + u.ambient + + u.diffuse*vec3(diffuse) + + u.specular.xyz*vec3(specular); + + // Reinhard tone mapping + vec3 mappedColor = color / (vec3(1.0) + color); + + // Output this color (no need to gamma adjust since the framebuffer is sRGB) + outColor = vec4(mappedColor, 1.0); +} diff --git a/graphics/deko3d/deko_examples/source/basic_vsh.glsl b/graphics/deko3d/deko_examples/source/basic_vsh.glsl new file mode 100644 index 0000000..b569486 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/basic_vsh.glsl @@ -0,0 +1,12 @@ +#version 460 + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec4 inAttrib; + +layout (location = 0) out vec4 outAttrib; + +void main() +{ + gl_Position = vec4(inPos, 1.0); + outAttrib = inAttrib; +} diff --git a/graphics/deko3d/deko_examples/source/color_fsh.glsl b/graphics/deko3d/deko_examples/source/color_fsh.glsl new file mode 100644 index 0000000..4fb790c --- /dev/null +++ b/graphics/deko3d/deko_examples/source/color_fsh.glsl @@ -0,0 +1,9 @@ +#version 460 + +layout (location = 0) in vec3 inColor; +layout (location = 0) out vec4 outColor; + +void main() +{ + outColor = vec4(inColor, 1.0); +} diff --git a/graphics/deko3d/deko_examples/source/composition_fsh.glsl b/graphics/deko3d/deko_examples/source/composition_fsh.glsl new file mode 100644 index 0000000..741a493 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/composition_fsh.glsl @@ -0,0 +1,53 @@ +#version 460 + +layout (location = 0) out vec4 outColor; + +layout (binding = 0) uniform sampler2D texAlbedo; +layout (binding = 1) uniform sampler2D texNormal; +layout (binding = 2) uniform sampler2D texViewDir; + +layout (std140, binding = 0) uniform Lighting +{ + vec4 lightPos; // if w=0 this is lightDir + vec3 ambient; + vec3 diffuse; + vec4 specular; // w is shininess +} u; + +void main() +{ + // Uncomment the coordinate reversion below to observe the effects of tiled corruption + ivec2 coord = /*textureSize(texAlbedo, 0) - ivec2(1,1) -*/ ivec2(gl_FragCoord.xy); + + // Retrieve values from the g-buffer + vec4 albedo = texelFetch(texAlbedo, coord, 0); + vec3 normal = texelFetch(texNormal, coord, 0).xyz; + vec3 viewDir = texelFetch(texViewDir, coord, 0).xyz; + + // Calculate light direction (i.e. vector that points *towards* the light source) + vec3 lightDir; + if (u.lightPos.w != 0.0) + lightDir = normalize(u.lightPos.xyz + viewDir); + else + lightDir = -u.lightPos.xyz; + viewDir = normalize(viewDir); + + // Calculate diffuse factor + float diffuse = max(0.0, dot(normal,lightDir)); + + // Calculate specular factor (Blinn-Phong) + vec3 halfwayDir = normalize(lightDir + viewDir); + float specular = pow(max(0.0, dot(normal,halfwayDir)), u.specular.w); + + // Calculate the color + vec3 color = + u.ambient + + albedo.rgb*u.diffuse*vec3(diffuse) + + u.specular.xyz*vec3(specular); + + // Reinhard tone mapping + vec3 mappedColor = albedo.a * color / (vec3(1.0) + color); + + // Output this color (no need to gamma adjust since the framebuffer is sRGB) + outColor = vec4(mappedColor, albedo.a); +} diff --git a/graphics/deko3d/deko_examples/source/composition_vsh.glsl b/graphics/deko3d/deko_examples/source/composition_vsh.glsl new file mode 100644 index 0000000..38bcc36 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/composition_vsh.glsl @@ -0,0 +1,24 @@ +#version 460 + +/* +ID | gl_Position.xy +0 | -1.0 +1.0 +1 | -1.0 -1.0 +2 | +1.0 -1.0 +3 | +1.0 +1.0 +*/ + +void main() +{ + if ((gl_VertexID & 2) == 0) + gl_Position.x = -1.0; + else + gl_Position.x = +1.0; + + if (((gl_VertexID+1) & 2) == 0) + gl_Position.y = +1.0; + else + gl_Position.y = -1.0; + + gl_Position.zw = vec2(0.5, 1.0); +} diff --git a/graphics/deko3d/deko_examples/source/main.cpp b/graphics/deko3d/deko_examples/source/main.cpp new file mode 100644 index 0000000..2e68447 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/main.cpp @@ -0,0 +1,137 @@ +/* +** deko3d Examples - Main Menu +*/ + +// Sample Framework headers +#include "SampleFramework/CApplication.h" + +// C++ standard library headers +#include + +void Example01(void); +void Example02(void); +void Example03(void); +void Example04(void); +void Example05(void); +void Example06(void); +void Example07(void); +void Example08(void); +void Example09(void); + +namespace +{ + using ExampleFunc = void(*)(void); + struct Example + { + ExampleFunc mainfunc; + const char* name; + }; + + constexpr std::array Examples = + { + Example{ Example01, "01: Simple Setup" }, + Example{ Example02, "02: Triangle" }, + Example{ Example03, "03: Cube" }, + Example{ Example04, "04: Textured Cube" }, + Example{ Example05, "05: Simple Tessellation" }, + Example{ Example06, "06: Simple Multisampling" }, + Example{ Example07, "07: Mesh Loading and Lighting (sRGB)" }, + Example{ Example08, "08: Deferred Shading (Multipass Rendering with Tiled Cache)" }, + Example{ Example09, "09: Simple Compute Shader (Geometry Generation)" }, + }; +} + +class CMainMenu final : public CApplication +{ + static constexpr unsigned EntriesPerScreen = 39; + static constexpr unsigned EntryPageLength = 10; + + int screenPos; + int selectPos; + + void renderMenu() + { + printf("\x1b[2J\n"); + printf(" deko3d Examples\n"); + printf(" Press PLUS(+) to exit; A to select an example to run\n"); + printf("\n"); + printf("--------------------------------------------------------------------------------"); + printf("\n"); + + for (unsigned i = 0; i < (Examples.size() - screenPos) && i < EntriesPerScreen; i ++) + { + unsigned id = screenPos+i; + printf(" %c %s\n", id==unsigned(selectPos) ? '*' : ' ', Examples[id].name); + } + } + + CMainMenu() : screenPos{}, selectPos{} + { + consoleInit(NULL); + renderMenu(); + } + + ~CMainMenu() + { + consoleExit(NULL); + } + + bool onFrame(u64 ns) override + { + int oldPos = selectPos; + hidScanInput(); + + u64 kDown = hidKeysDown(CONTROLLER_P1_AUTO); + if (kDown & KEY_PLUS) + { + selectPos = -1; + return false; + } + if (kDown & KEY_A) + return false; + if (kDown & KEY_UP) + selectPos -= 1; + if (kDown & KEY_DOWN) + selectPos += 1; + if (kDown & KEY_LEFT) + selectPos -= EntryPageLength; + if (kDown & KEY_RIGHT) + selectPos += EntryPageLength; + + if (selectPos < 0) + selectPos = 0; + if (unsigned(selectPos) >= Examples.size()) + selectPos = Examples.size()-1; + + if (selectPos != oldPos) + { + if (selectPos < screenPos) + screenPos = selectPos; + else if (selectPos >= screenPos + int(EntriesPerScreen)) + screenPos = selectPos - EntriesPerScreen + 1; + renderMenu(); + } + + consoleUpdate(NULL); + return true; + } + +public: + static ExampleFunc Display() + { + CMainMenu app; + app.run(); + return app.selectPos >= 0 ? Examples[app.selectPos].mainfunc : nullptr; + } +}; + +int main(int argc, char* argv[]) +{ + for (;;) + { + ExampleFunc func = CMainMenu::Display(); + if (!func) break; + func(); + } + return 0; +} diff --git a/graphics/deko3d/deko_examples/source/sinewave.glsl b/graphics/deko3d/deko_examples/source/sinewave.glsl new file mode 100644 index 0000000..174cd7b --- /dev/null +++ b/graphics/deko3d/deko_examples/source/sinewave.glsl @@ -0,0 +1,38 @@ +#version 460 + +layout (local_size_x = 32) in; + +struct Vertex +{ + vec4 position; + vec4 color; +}; + +layout (std140, binding = 0) uniform Params +{ + vec4 colorA; + vec4 colorB; + float offset; + float scale; +} u; + +layout (std430, binding = 0) buffer Output +{ + Vertex vertices[]; +} o; + +const float TAU = 6.2831853071795; + +void calcVertex(out Vertex vtx, float x) +{ + vtx.position = vec4(x * 2.0 - 1.0, u.scale * sin((u.offset + x)*TAU), 0.5, 1.0); + vtx.color = mix(u.colorA, u.colorB, x); +} + +void main() +{ + uint id = gl_GlobalInvocationID.x; + uint maxid = gl_WorkGroupSize.x * gl_NumWorkGroups.x - 1; + float x = float(id) / float(maxid); + calcVertex(o.vertices[id], x); +} diff --git a/graphics/deko3d/deko_examples/source/tess_simple_tcsh.glsl b/graphics/deko3d/deko_examples/source/tess_simple_tcsh.glsl new file mode 100644 index 0000000..44a822d --- /dev/null +++ b/graphics/deko3d/deko_examples/source/tess_simple_tcsh.glsl @@ -0,0 +1,21 @@ +#version 460 + +layout (vertices = 3) out; + +layout (location = 0) in vec4 inAttrib[]; + +layout (location = 0) out vec4 outAttrib[]; + +void main() +{ + if (gl_InvocationID == 0) + { + gl_TessLevelInner[0] = 5.0; // i.e. 2 concentric triangles with the center being a triangle + gl_TessLevelOuter[0] = 2.0; + gl_TessLevelOuter[1] = 3.0; + gl_TessLevelOuter[2] = 5.0; + } + + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + outAttrib[gl_InvocationID] = inAttrib[gl_InvocationID]; +} diff --git a/graphics/deko3d/deko_examples/source/tess_simple_tesh.glsl b/graphics/deko3d/deko_examples/source/tess_simple_tesh.glsl new file mode 100644 index 0000000..117fff6 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/tess_simple_tesh.glsl @@ -0,0 +1,21 @@ +#version 460 + +layout (triangles, equal_spacing, ccw) in; + +layout (location = 0) in vec4 inAttrib[]; + +layout (location = 0) out vec4 outAttrib; + +vec4 interpolate(in vec4 v0, in vec4 v1, in vec4 v2) +{ + vec4 a0 = gl_TessCoord.x * v0; + vec4 a1 = gl_TessCoord.y * v1; + vec4 a2 = gl_TessCoord.z * v2; + return a0 + a1 + a2; +} + +void main() +{ + gl_Position = interpolate(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_in[2].gl_Position); + outAttrib = interpolate(inAttrib[0], inAttrib[1], inAttrib[2]); +} diff --git a/graphics/deko3d/deko_examples/source/texture_fsh.glsl b/graphics/deko3d/deko_examples/source/texture_fsh.glsl new file mode 100644 index 0000000..9092000 --- /dev/null +++ b/graphics/deko3d/deko_examples/source/texture_fsh.glsl @@ -0,0 +1,11 @@ +#version 460 + +layout (location = 0) in vec2 inTexCoord; +layout (location = 0) out vec4 outColor; + +layout (binding = 0) uniform sampler2D texture0; + +void main() +{ + outColor = texture(texture0, inTexCoord); +} diff --git a/graphics/deko3d/deko_examples/source/transform_normal_vsh.glsl b/graphics/deko3d/deko_examples/source/transform_normal_vsh.glsl new file mode 100644 index 0000000..0b7ac0f --- /dev/null +++ b/graphics/deko3d/deko_examples/source/transform_normal_vsh.glsl @@ -0,0 +1,28 @@ +#version 460 + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec3 inNormal; +layout (location = 2) in vec4 inAttrib; + +layout (location = 0) out vec3 outWorldPos; +layout (location = 1) out vec3 outNormal; +layout (location = 2) out vec4 outAttrib; + +layout (std140, binding = 0) uniform Transformation +{ + mat4 mdlvMtx; + mat4 projMtx; +} u; + +void main() +{ + vec4 worldPos = u.mdlvMtx * vec4(inPos, 1.0); + gl_Position = u.projMtx * worldPos; + + outWorldPos = worldPos.xyz; + + outNormal = normalize(mat3(u.mdlvMtx) * inNormal); + + // Pass through the user-defined attribute + outAttrib = inAttrib; +} diff --git a/graphics/deko3d/deko_examples/source/transform_vsh.glsl b/graphics/deko3d/deko_examples/source/transform_vsh.glsl new file mode 100644 index 0000000..e4310ef --- /dev/null +++ b/graphics/deko3d/deko_examples/source/transform_vsh.glsl @@ -0,0 +1,20 @@ +#version 460 + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec4 inAttrib; + +layout (location = 0) out vec4 outAttrib; + +layout (std140, binding = 0) uniform Transformation +{ + mat4 mdlvMtx; + mat4 projMtx; +} u; + +void main() +{ + vec4 pos = u.mdlvMtx * vec4(inPos, 1.0); + gl_Position = u.projMtx * pos; + + outAttrib = inAttrib; +}