diff --git a/renderdoc/driver/d3d12/d3d12_dxil_debug.cpp b/renderdoc/driver/d3d12/d3d12_dxil_debug.cpp index 3b8d383188..47ce955316 100644 --- a/renderdoc/driver/d3d12/d3d12_dxil_debug.cpp +++ b/renderdoc/driver/d3d12/d3d12_dxil_debug.cpp @@ -25,3 +25,321 @@ #pragma once #include "d3d12_dxil_debug.h" +#include "d3d12_debug.h" +#include "d3d12_resources.h" + +extern bool IsShaderParameterVisible(DXBC::ShaderType shaderType, + D3D12_SHADER_VISIBILITY shaderVisibility); + +using namespace DXIL; +using namespace DXILDebug; + +namespace DXILDebug +{ +// ->dxbc, dxilDebugger->global, dxilDebugger->eventId); +D3D12APIWrapper::D3D12APIWrapper(WrappedID3D12Device *device, Debugger *debugger) + : m_Device(device), m_Debugger(debugger) +{ +} + +static void FlattenSingleVariable(const rdcstr &cbufferName, uint32_t byteOffset, + const rdcstr &basename, const ShaderVariable &v, + rdcarray &outvars, + rdcarray &sourcevars) +{ + size_t outIdx = byteOffset / 16; + size_t outComp = (byteOffset % 16) / 4; + + if(v.RowMajor()) + outvars.resize(RDCMAX(outIdx + v.rows, outvars.size())); + else + outvars.resize(RDCMAX(outIdx + v.columns, outvars.size())); + + if(outvars[outIdx].columns > 0) + { + // if we already have a variable in this slot, just copy the data for this variable and add the + // source mapping. + // We should not overlap into the next register as that's not allowed. + memcpy(&outvars[outIdx].value.u32v[outComp], &v.value.u32v[0], sizeof(uint32_t) * v.columns); + + SourceVariableMapping mapping; + mapping.name = basename; + mapping.type = v.type; + mapping.rows = v.rows; + mapping.columns = v.columns; + mapping.offset = byteOffset; + mapping.variables.resize(v.columns); + + for(int i = 0; i < v.columns; i++) + { + mapping.variables[i].type = DebugVariableType::Constant; + mapping.variables[i].name = StringFormat::Fmt("%s[%u]", cbufferName.c_str(), outIdx); + mapping.variables[i].component = uint16_t(outComp + i); + } + + sourcevars.push_back(mapping); + } + else + { + const uint32_t numRegisters = v.RowMajor() ? v.rows : v.columns; + for(uint32_t reg = 0; reg < numRegisters; reg++) + { + outvars[outIdx + reg].rows = 1; + outvars[outIdx + reg].type = VarType::Unknown; + outvars[outIdx + reg].columns = v.columns; + outvars[outIdx + reg].flags = v.flags; + } + + if(v.RowMajor()) + { + for(size_t ri = 0; ri < v.rows; ri++) + memcpy(&outvars[outIdx + ri].value.u32v[0], &v.value.u32v[ri * v.columns], + sizeof(uint32_t) * v.columns); + } + else + { + // if we have a matrix stored in column major order, we need to transpose it back so we can + // unroll it into vectors. + for(size_t ci = 0; ci < v.columns; ci++) + for(size_t ri = 0; ri < v.rows; ri++) + outvars[outIdx + ci].value.u32v[ri] = v.value.u32v[ri * v.columns + ci]; + } + + SourceVariableMapping mapping; + mapping.name = basename; + mapping.type = v.type; + mapping.rows = v.rows; + mapping.columns = v.columns; + mapping.offset = byteOffset; + mapping.variables.resize(v.rows * v.columns); + + RDCASSERT(outComp == 0 || v.rows == 1, outComp, v.rows); + + size_t i = 0; + for(uint8_t r = 0; r < v.rows; r++) + { + for(uint8_t c = 0; c < v.columns; c++) + { + size_t regIndex = outIdx + (v.RowMajor() ? r : c); + size_t compIndex = outComp + (v.RowMajor() ? c : r); + + mapping.variables[i].type = DebugVariableType::Constant; + mapping.variables[i].name = StringFormat::Fmt("%s[%zu]", cbufferName.c_str(), regIndex); + mapping.variables[i].component = uint16_t(compIndex); + i++; + } + } + + sourcevars.push_back(mapping); + } +} + +static void FlattenVariables(const rdcstr &cbufferName, const rdcarray &constants, + const rdcarray &invars, + rdcarray &outvars, const rdcstr &prefix, + uint32_t baseOffset, rdcarray &sourceVars) +{ + RDCASSERTEQUAL(constants.size(), invars.size()); + + for(size_t i = 0; i < constants.size(); i++) + { + const ShaderConstant &c = constants[i]; + const ShaderVariable &v = invars[i]; + + uint32_t byteOffset = baseOffset + c.byteOffset; + + rdcstr basename = prefix + rdcstr(v.name); + + if(v.type == VarType::Struct) + { + // check if this is an array of structs or not + if(c.type.elements == 1) + { + FlattenVariables(cbufferName, c.type.members, v.members, outvars, basename + ".", + byteOffset, sourceVars); + } + else + { + for(int m = 0; m < v.members.count(); m++) + { + FlattenVariables(cbufferName, c.type.members, v.members[m].members, outvars, + StringFormat::Fmt("%s[%zu].", basename.c_str(), m), + byteOffset + m * c.type.arrayByteStride, sourceVars); + } + } + } + else if(c.type.elements > 1 || (v.rows == 0 && v.columns == 0) || !v.members.empty()) + { + for(int m = 0; m < v.members.count(); m++) + { + FlattenSingleVariable(cbufferName, byteOffset + m * c.type.arrayByteStride, + StringFormat::Fmt("%s[%zu]", basename.c_str(), m), v.members[m], + outvars, sourceVars); + } + } + else + { + FlattenSingleVariable(cbufferName, byteOffset, basename, v, outvars, sourceVars); + } + } +} +static void AddCBufferToGlobalState(const Program &program, GlobalState &global, + rdcarray &sourceVars, + const ShaderReflection &refl, const BindingSlot &slot, + bytebuf &cbufData) +{ + // Find the identifier + size_t numCBs = refl.constantBlocks.size(); + for(size_t i = 0; i < numCBs; ++i) + { + const ConstantBlock &cb = refl.constantBlocks[i]; + if(slot.registerSpace == (uint32_t)cb.fixedBindSetOrSpace && + slot.shaderRegister >= (uint32_t)cb.fixedBindNumber && + slot.shaderRegister < (uint32_t)(cb.fixedBindNumber + cb.bindArraySize)) + { + uint32_t arrayIndex = slot.shaderRegister - cb.fixedBindNumber; + + rdcarray &targetVars = + cb.bindArraySize > 1 ? global.constantBlocks[i].members[arrayIndex].members + : global.constantBlocks[i].members; + RDCASSERTMSG("Reassigning previously filled cbuffer", targetVars.empty()); + + global.constantBlocks[i].name = program.GetResourceReferenceName(ResourceClass::CBuffer, slot); + + SourceVariableMapping cbSourceMapping; + cbSourceMapping.name = refl.constantBlocks[i].name; + cbSourceMapping.variables.push_back( + DebugVariableReference(DebugVariableType::Constant, global.constantBlocks[i].name)); + sourceVars.push_back(cbSourceMapping); + + rdcstr identifierPrefix = global.constantBlocks[i].name; + rdcstr variablePrefix = refl.constantBlocks[i].name; + if(cb.bindArraySize > 1) + { + identifierPrefix = + StringFormat::Fmt("%s[%u]", global.constantBlocks[i].name.c_str(), arrayIndex); + variablePrefix = StringFormat::Fmt("%s[%u]", refl.constantBlocks[i].name.c_str(), arrayIndex); + + // The above sourceVar is for the logical identifier, and FlattenVariables adds the + // individual elements of the constant buffer. For CB arrays, add an extra source + // var for the CB array index + SourceVariableMapping cbArrayMapping; + global.constantBlocks[i].members[arrayIndex].name = StringFormat::Fmt("[%u]", arrayIndex); + cbArrayMapping.name = variablePrefix; + cbArrayMapping.variables.push_back( + DebugVariableReference(DebugVariableType::Constant, identifierPrefix)); + sourceVars.push_back(cbArrayMapping); + } + const rdcarray &constants = + (cb.bindArraySize > 1) ? refl.constantBlocks[i].variables[0].type.members + : refl.constantBlocks[i].variables; + + rdcarray vars; + StandardFillCBufferVariables(refl.resourceId, constants, vars, cbufData); + FlattenVariables(identifierPrefix, constants, vars, targetVars, variablePrefix + ".", 0, + sourceVars); + for(size_t c = 0; c < targetVars.size(); c++) + targetVars[c].name = StringFormat::Fmt("[%u]", (uint32_t)c); + + return; + } + } +} + +void FetchConstantBufferData(WrappedID3D12Device *pDevice, const Program &program, + const D3D12RenderState::RootSignature &rootsig, + const ShaderReflection &refl, GlobalState &global, + rdcarray &sourceVars) +{ + WrappedID3D12RootSignature *pD3D12RootSig = + pDevice->GetResourceManager()->GetCurrentAs(rootsig.rootsig); + + size_t numParams = RDCMIN(pD3D12RootSig->sig.Parameters.size(), rootsig.sigelems.size()); + for(size_t i = 0; i < numParams; i++) + { + const D3D12RootSignatureParameter &rootSigParam = pD3D12RootSig->sig.Parameters[i]; + const D3D12RenderState::SignatureElement &element = rootsig.sigelems[i]; + if(IsShaderParameterVisible(program.GetShaderType(), rootSigParam.ShaderVisibility)) + { + if(rootSigParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS && + element.type == eRootConst) + { + BindingSlot slot(rootSigParam.Constants.ShaderRegister, rootSigParam.Constants.RegisterSpace); + UINT sizeBytes = sizeof(uint32_t) * RDCMIN(rootSigParam.Constants.Num32BitValues, + (UINT)element.constants.size()); + bytebuf cbufData((const byte *)element.constants.data(), sizeBytes); + AddCBufferToGlobalState(program, global, sourceVars, refl, slot, cbufData); + } + else if(rootSigParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_CBV && element.type == eRootCBV) + { + BindingSlot slot(rootSigParam.Descriptor.ShaderRegister, + rootSigParam.Descriptor.RegisterSpace); + ID3D12Resource *cbv = pDevice->GetResourceManager()->GetCurrentAs(element.id); + bytebuf cbufData; + pDevice->GetDebugManager()->GetBufferData(cbv, element.offset, 0, cbufData); + AddCBufferToGlobalState(program, global, sourceVars, refl, slot, cbufData); + } + else if(rootSigParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE && + element.type == eRootTable) + { + UINT prevTableOffset = 0; + WrappedID3D12DescriptorHeap *heap = + pDevice->GetResourceManager()->GetCurrentAs(element.id); + + size_t numRanges = rootSigParam.ranges.size(); + for(size_t r = 0; r < numRanges; r++) + { + // For this traversal we only care about CBV descriptor ranges, but we still need to + // calculate the table offsets in case a descriptor table has a combination of + // different range types + const D3D12_DESCRIPTOR_RANGE1 &range = rootSigParam.ranges[r]; + + UINT offset = range.OffsetInDescriptorsFromTableStart; + if(range.OffsetInDescriptorsFromTableStart == D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + offset = prevTableOffset; + + D3D12Descriptor *desc = (D3D12Descriptor *)heap->GetCPUDescriptorHandleForHeapStart().ptr; + desc += element.offset; + desc += offset; + + UINT numDescriptors = range.NumDescriptors; + if(numDescriptors == UINT_MAX) + { + // Find out how many descriptors are left after + numDescriptors = heap->GetNumDescriptors() - offset - (UINT)element.offset; + + // TODO: Look up the bind point in the D3D12 state to try to get + // a better guess at the number of descriptors + } + + prevTableOffset = offset + numDescriptors; + + if(range.RangeType != D3D12_DESCRIPTOR_RANGE_TYPE_CBV) + continue; + + BindingSlot slot(range.BaseShaderRegister, range.RegisterSpace); + + bytebuf cbufData; + for(UINT n = 0; n < numDescriptors; ++n, ++slot.shaderRegister) + { + const D3D12_CONSTANT_BUFFER_VIEW_DESC &cbv = desc->GetCBV(); + ResourceId resId; + uint64_t byteOffset = 0; + WrappedID3D12Resource::GetResIDFromAddr(cbv.BufferLocation, resId, byteOffset); + ID3D12Resource *pCbvResource = + pDevice->GetResourceManager()->GetCurrentAs(resId); + cbufData.clear(); + + if(cbv.SizeInBytes > 0) + pDevice->GetDebugManager()->GetBufferData(pCbvResource, byteOffset, cbv.SizeInBytes, + cbufData); + AddCBufferToGlobalState(program, global, sourceVars, refl, slot, cbufData); + + desc++; + } + } + } + } + } +} +}; diff --git a/renderdoc/driver/d3d12/d3d12_dxil_debug.h b/renderdoc/driver/d3d12/d3d12_dxil_debug.h index 630116a6c4..de14010120 100644 --- a/renderdoc/driver/d3d12/d3d12_dxil_debug.h +++ b/renderdoc/driver/d3d12/d3d12_dxil_debug.h @@ -23,3 +23,28 @@ ******************************************************************************/ #pragma once + +#include "driver/shaders/dxil/dxil_debug.h" +#include "d3d12_device.h" +#include "d3d12_state.h" + +namespace DXILDebug +{ +class Debugger; + +void FetchConstantBufferData(WrappedID3D12Device *pDevice, const DXIL::Program &program, + const D3D12RenderState::RootSignature &rootsig, + const ShaderReflection &refl, DXILDebug::GlobalState &global, + rdcarray &sourceVars); + +class D3D12APIWrapper +{ +public: + D3D12APIWrapper(WrappedID3D12Device *device, Debugger *debugger); + ~D3D12APIWrapper() = default; +private: + const WrappedID3D12Device *m_Device; + const Debugger *m_Debugger; +}; + +}; diff --git a/renderdoc/driver/d3d12/d3d12_shaderdebug.cpp b/renderdoc/driver/d3d12/d3d12_shaderdebug.cpp index 440037f2a2..c638ae6613 100644 --- a/renderdoc/driver/d3d12/d3d12_shaderdebug.cpp +++ b/renderdoc/driver/d3d12/d3d12_shaderdebug.cpp @@ -30,6 +30,7 @@ #include "strings/string_utils.h" #include "d3d12_command_queue.h" #include "d3d12_debug.h" +#include "d3d12_dxil_debug.h" #include "d3d12_replay.h" #include "d3d12_resources.h" #include "d3d12_shader_cache.h" @@ -49,8 +50,7 @@ struct DebugHit uint32_t rawdata; // arbitrary, depending on shader }; -static bool IsShaderParameterVisible(DXBC::ShaderType shaderType, - D3D12_SHADER_VISIBILITY shaderVisibility) +bool IsShaderParameterVisible(DXBC::ShaderType shaderType, D3D12_SHADER_VISIBILITY shaderVisibility) { if(shaderVisibility == D3D12_SHADER_VISIBILITY_ALL) return true; @@ -1858,6 +1858,8 @@ ShaderDebugTrace *D3D12Replay::DebugVertex(uint32_t eventId, uint32_t vertid, ui else { RDCERR("TODO ADD DXIL VERTEX SHADER DEBUGGER SUPPORT"); + DXILDebug::Debugger *debugger = new DXILDebug::Debugger(); + ret = debugger->BeginDebug(eventId, dxbc, refl, 0); } return ret; @@ -2698,6 +2700,9 @@ void ExtractInputsPS(PSInput IN, else { RDCERR("TODO ADD DXIL PIXEL SHADER DEBUGGER SUPPORT"); + DXILDebug::Debugger *debugger = new DXILDebug::Debugger(); + uint32_t activeLaneIdx = 0; + ret = debugger->BeginDebug(eventId, dxbc, refl, activeLaneIdx); } return ret; @@ -2831,7 +2836,46 @@ ShaderDebugTrace *D3D12Replay::DebugThread(uint32_t eventId, } else { - RDCERR("TODO ADD DXIL COMPUTE SHADER DEBUGGER SUPPORT"); + // get ourselves in pristine state before this dispatch (without any side effects it may have had) + m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw); + + DXILDebug::Debugger *debugger = new DXILDebug::Debugger(); + ret = debugger->BeginDebug(eventId, dxbc, refl, 0); + DXILDebug::GlobalState &globalState = debugger->GetGlobalState(); + + std::map &builtins = globalState.builtinInputs; + + uint32_t threadDim[3] = { + refl.dispatchThreadsDimension[0], + refl.dispatchThreadsDimension[1], + refl.dispatchThreadsDimension[2], + }; + + // SV_DispatchThreadID + builtins[ShaderBuiltin::DispatchThreadIndex] = ShaderVariable( + rdcstr(), groupid[0] * threadDim[0] + threadid[0], groupid[1] * threadDim[1] + threadid[1], + groupid[2] * threadDim[2] + threadid[2], 0U); + + // SV_GroupID + builtins[ShaderBuiltin::GroupIndex] = + ShaderVariable(rdcstr(), groupid[0], groupid[1], groupid[2], 0U); + + // SV_GroupThreadID + builtins[ShaderBuiltin::GroupThreadIndex] = + ShaderVariable(rdcstr(), threadid[0], threadid[1], threadid[2], 0U); + + // SV_GroupIndex + builtins[ShaderBuiltin::GroupFlatIndex] = ShaderVariable( + rdcstr(), + threadid[2] * threadDim[0] * threadDim[1] + threadid[1] * threadDim[0] + threadid[0], 0U, + 0U, 0U); + + // TODO ADD ANY OTHER INPUTS + + // Fetch constant buffer data from root signature + DXILDebug::FetchConstantBufferData(m_pDevice, *dxbc->GetDXILByteCode(), rs.compute, refl, + globalState, ret->sourceVars); + ret->constantBlocks = globalState.constantBlocks; } return ret; @@ -2844,7 +2888,10 @@ rdcarray D3D12Replay::ContinueDebug(ShaderDebugger *debugger) if(((DXBCContainerDebugger *)debugger)->isDXIL) { - return {}; + DXILDebug::Debugger *dxilDebugger = (DXILDebug::Debugger *)debugger; + DXILDebug::D3D12APIWrapper apiWrapper(m_pDevice, dxilDebugger); + D3D12MarkerRegion region(m_pDevice->GetQueue()->GetReal(), "ContinueDebug Simulation Loop"); + return dxilDebugger->ContinueDebug(&apiWrapper); } else { diff --git a/renderdoc/driver/shaders/dxil/dxil_debug.cpp b/renderdoc/driver/shaders/dxil/dxil_debug.cpp index b5a3b7bdab..d6d920614b 100644 --- a/renderdoc/driver/shaders/dxil/dxil_debug.cpp +++ b/renderdoc/driver/shaders/dxil/dxil_debug.cpp @@ -25,7 +25,722 @@ #pragma once #include "dxil_debug.h" +#include "common/formatting.h" +#include "replay/replay_driver.h" + +using namespace DXIL; + +VarType ConvertDXILTypeToVarType(const Type *type) +{ + // TODO: Handle Vectors, Arrays, Structs + RDCASSERT(type->type == DXIL::Type::TypeKind::Scalar); + if(type->scalarType == Type::ScalarKind::Int) + { + if(type->bitWidth == 64) + return VarType::SLong; + else if(type->bitWidth == 32) + return VarType::SInt; + else if(type->bitWidth == 16) + return VarType::SShort; + else if(type->bitWidth == 8) + return VarType::SByte; + else if(type->bitWidth == 1) + return VarType::Bool; + } + else if(type->scalarType == Type::ScalarKind::Float) + { + if(type->bitWidth == 64) + return VarType::Double; + else if(type->bitWidth == 32) + return VarType::Float; + else if(type->bitWidth == 16) + return VarType::Half; + } + return VarType::Unknown; +} namespace DXILDebug { +ThreadState::ThreadState(uint32_t workgroupIndex, Debugger &debugger, const GlobalState &globalState) + : m_Debugger(debugger), m_GlobalState(globalState), m_Program(debugger.GetProgram()) +{ + m_WorkgroupIndex = workgroupIndex; + m_Function = NULL; + m_FunctionInstructionIdx = 0; + m_GlobalInstructionIdx = 0; + m_Killed = false; + m_Ended = false; + m_Callstack.clear(); +} + +bool ThreadState::Finished() const +{ + return m_Killed || m_Ended || m_Callstack.empty(); +} + +void ThreadState::EnterFunction(const Function *function, const rdcarray &args) +{ + StackFrame *frame = new StackFrame(function); + m_Function = function; + m_FunctionInstructionIdx = 0; + + // if there's a previous stack frame, save its live list + if(!m_Callstack.empty()) + { + /* + // process the outgoing scope + ProcessScopeChange(live, {}); + callstack.back()->live = live; + */ + } + + // start with just globals + // live = debugger.GetLiveGlobals(); + + m_GlobalInstructionIdx = m_Debugger.GetGlobalInstructionIdx(function, m_FunctionInstructionIdx); + m_Callstack.push_back(frame); + + ShaderDebugState *state = m_State; + m_State = state; +} + +void ThreadState::EnterEntryPoint(const Function *function, ShaderDebugState *state) +{ + m_State = state; + + EnterFunction(function, {}); + + m_State = NULL; +} + +bool ThreadState::ExecuteInstruction(const rdcarray &workgroups) +{ + const Instruction &inst = *m_Function->instructions[m_FunctionInstructionIdx]; + Operation opCode = inst.op; + + ShaderVariable result; + Id resultId; + Program::MakeResultId(inst, resultId); + const Type *retType = inst.type; + bool recordChange = true; + + switch(opCode) + { + case Operation::Ret: m_Ended = true; break; + case Operation::Call: + { + rdcstr funcCallName = inst.getFuncCall()->name; + if(funcCallName.beginsWith("dx.op.")) + { + DXOp dxOpCode = DXOp::NumOpCodes; + RDCASSERT(getival(inst.args[0], dxOpCode)); + switch(dxOpCode) + { + case DXOp::BufferLoad: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::BufferStore: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::CreateHandle: + { + const ResourceReference *resRef = m_Program.GetResourceReference(resultId); + if(resRef) + { + const rdcarray *list = NULL; + // a static known handle which should be in the global resources container + switch(resRef->resourceBase.resClass) + { + case ResourceClass::CBuffer: list = &m_GlobalState.constantBlocks; break; + case ResourceClass::SRV: list = &m_GlobalState.readOnlyResources; break; + case ResourceClass::UAV: list = &m_GlobalState.readWriteResources; break; + case ResourceClass::Sampler: list = &m_GlobalState.samplers; break; + default: + RDCERR("Invalid ResourceClass %u", (uint32_t)resRef->resourceBase.resClass); + break; + }; + RDCASSERT(list); + + const rdcarray &resources = *list; + for(uint32_t i = 0; i < resources.size(); ++i) + { + if(resources[i].name == resultId) + { + result = resources[i]; + break; + } + } + recordChange = false; + RDCASSERT(!result.name.empty()); + } + else + { + // TODO: support for dynamic handles i.e. array lookups + RDCERR("Unhandled dynamic handle"); + /* + DescriptorCategory category; + uint32_t index; + uint32_t arrayElement = 0; + // Need to make a shader variable for the return : it needs to have a + binding point result.SetBindIndex(ShaderBindIndex(category, index, arrayElement)); + */ + } + break; + } + case DXOp::CBufferLoadLegacy: + { + // Need to find the resource + Id handleId = m_Program.GetArgId(inst, 1); + const ResourceReference *resRef = m_Program.GetResourceReference(handleId); + if(resRef) + { + uint32_t regIndex; + if(!getival(inst.args[2], regIndex)) + { + // get the value of regIndex from shader variable + Id regIndexId = m_Program.GetArgId(inst, 2); + // TODO: find the shader variable then get its value + RDCERR("Unhandled dynamic regIndex"); + } + + // DXIL will create a vector of a single type + // The vector element type will change to match what value will be extracted + // ie. float, double, int, short + // The number of elements is the number of elements that fit into 16-bytes + // i.e. 16 / sizeof(Element) + // DXIL reports this vector as a struct of N members of Element type. + result.name = resultId; + result.rows = 1; + result.value = m_Variables[handleId].members[regIndex].value; + + RDCASSERT(retType->type == DXIL::Type::TypeKind::Struct); + const Type *baseType = retType->members[0]; + RDCASSERT(baseType->type == DXIL::Type::TypeKind::Scalar); + result.type = ConvertDXILTypeToVarType(retType->members[0]); + switch(result.type) + { + case VarType::SLong: + case VarType::ULong: + case VarType::Double: result.columns = 2; break; + case VarType::SInt: + case VarType::UInt: + case VarType::Float: result.columns = 4; break; + case VarType::SShort: + case VarType::UShort: + case VarType::Half: result.columns = 8; break; + case VarType::SByte: + case VarType::UByte: result.columns = 16; break; + case VarType::Bool: + case VarType::Enum: + case VarType::Struct: + case VarType::GPUPointer: + case VarType::ConstantBlock: + case VarType::ReadOnlyResource: + case VarType::ReadWriteResource: + case VarType::Sampler: + case VarType::Unknown: + RDCERR("Unhandled VarType %s", ToStr(result.type).c_str()); + break; + }; + } + else + { + RDCERR("Unknown cbuffer handle '%s'", handleId.c_str()); + } + break; + } + case DXOp::Sin: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::Cos: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::Tan: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::Asin: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::Acos: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::Atan: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::Hsin: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::Hcos: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::Htan: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::Round_pi: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::FMax: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + case DXOp::FMin: + { + RDCLOG("%s", ToStr(dxOpCode).c_str()); + resultId.clear(); + break; + } + default: + RDCERR("Unhandled dx.op method `%s` %s", funcCallName.c_str(), ToStr(dxOpCode).c_str()); + break; + }; + } + else if(funcCallName.beginsWith("llvm.dbg.")) + { + RDCLOG("Unhandled llvm.dbg method `%s`", funcCallName.c_str()); + return false; + } + else + { + RDCERR("Unhandled call to function `%s`", funcCallName.c_str()); + break; + } + break; + } + case Operation::ExtractVal: + { + // TODO: need helper function to convert DXIL::Type* -> ShaderVariable + Id src = m_Program.GetArgId(inst, 0); + const ShaderVariable &srcVal = m_Variables[src]; + RDCASSERT(srcVal.members.empty()); + // TODO: handle greater than one index + RDCASSERT(inst.args.size() == 2); + uint64_t idx = ~0UL; + RDCASSERT(getival(inst.args[1], idx)); + RDCASSERT(idx < srcVal.columns); + + // TODO: check the srcVal.type matches the result type + result.name = resultId; + result.rows = 1; + result.columns = 1; + result.type = ConvertDXILTypeToVarType(retType); + RDCASSERTEQUAL(result.type, srcVal.type); + switch(result.type) + { + case VarType::Double: result.value.f64v[0] = srcVal.value.f64v[idx]; break; + case VarType::Float: result.value.f32v[0] = srcVal.value.f32v[idx]; break; + case VarType::Half: result.value.f16v[0] = srcVal.value.f16v[idx]; break; + case VarType::SLong: result.value.s64v[0] = srcVal.value.s64v[idx]; break; + case VarType::SInt: result.value.s32v[0] = srcVal.value.s32v[idx]; break; + case VarType::SShort: result.value.s16v[0] = srcVal.value.s16v[idx]; break; + case VarType::SByte: result.value.s8v[0] = srcVal.value.s8v[idx]; break; + default: RDCERR("Unhandle VarType %s", ToStr(result.type).c_str()); break; + }; + break; + } + case Operation::UToF: + { + RDCLOG("%s", ToStr(opCode).c_str()); + resultId.clear(); + break; + } + case Operation::Add: + { + RDCLOG("%s", ToStr(opCode).c_str()); + resultId.clear(); + break; + } + case Operation::Mul: + { + RDCLOG("%s", ToStr(opCode).c_str()); + resultId.clear(); + break; + } + case Operation::Bitcast: + { + RDCLOG("%s", ToStr(opCode).c_str()); + resultId.clear(); + break; + } + case Operation::INotEqual: + { + RDCLOG("%s", ToStr(opCode).c_str()); + resultId.clear(); + break; + } + case Operation::FPTrunc: + { + RDCLOG("%s", ToStr(opCode).c_str()); + resultId.clear(); + break; + } + case Operation::FToU: + { + RDCLOG("%s", ToStr(opCode).c_str()); + resultId.clear(); + break; + } + case Operation::Select: + { + RDCLOG("%s", ToStr(opCode).c_str()); + resultId.clear(); + break; + } + default: RDCERR("Unhandled DXIL opcode %s", ToStr(opCode).c_str()); break; + }; + + RDCASSERT(!(result.name.empty() ^ resultId.empty())); + if(!result.name.empty() && !resultId.empty()) + { + RDCASSERT(!m_Live.contains(resultId)); + m_Live.push_back(resultId); + RDCASSERT(m_Variables.count(resultId) == 0); + m_Variables[resultId] = result; + + if(recordChange) + { + ShaderVariableChange change; + change.after = result; + m_State->changes.push_back(change); + } + } + return true; +} + +void ThreadState::StepNext(ShaderDebugState *state, const rdcarray &workgroups) +{ + m_State = state; + + do + { + m_FunctionInstructionIdx++; + m_GlobalInstructionIdx = m_Debugger.GetGlobalInstructionIdx(m_Function, m_FunctionInstructionIdx); + + m_State->flags = ShaderEvents::NoEvent; + m_State->changes.clear(); + + } while(!ExecuteInstruction(workgroups)); + + m_State->nextInstruction = m_GlobalInstructionIdx; + + // TODO: CREATE THIS DATA + + m_State = NULL; +} + +uint32_t Debugger::GetGlobalInstructionIdx(const Function *function, uint32_t functionInstIdx) +{ + auto it = m_GlobalInstructionOffsets.find(function); + RDCASSERT(it != m_GlobalInstructionOffsets.end()); + return it->second + functionInstIdx; +} + +void Debugger::CalcActiveMask(rdcarray &activeMask) +{ + // one bool per workgroup thread + activeMask.resize(m_Workgroups.size()); + + // mark any threads that have finished as inactive, otherwise they're active + for(size_t i = 0; i < m_Workgroups.size(); i++) + activeMask[i] = !m_Workgroups[i].Finished(); + + // only pixel shaders automatically converge workgroups, compute shaders need explicit sync + if(m_Stage != ShaderStage::Pixel) + return; + + // TODO: implement pixel shader convergence + return; +} + +ShaderDebugTrace *Debugger::BeginDebug(uint32_t eventId, const DXBC::DXBCContainer *dxbcContainer, + const ShaderReflection &reflection, uint32_t activeLaneIndex) +{ + ShaderStage shaderStage = reflection.stage; + + m_DXBC = dxbcContainer; + m_Program = m_DXBC->GetDXILByteCode(); + m_EventId = eventId; + m_ActiveLaneIndex = activeLaneIndex; + m_Steps = 0; + + ShaderDebugTrace *ret = new ShaderDebugTrace; + ret->stage = shaderStage; + + uint32_t workgroupSize = shaderStage == ShaderStage::Pixel ? 4 : 1; + for(uint32_t i = 0; i < workgroupSize; i++) + m_Workgroups.push_back(ThreadState(i, *this, m_GlobalState)); + + // TODO: NEED TO POPULATE GROUPSHARED DATA + + // Create the storage layout for the constant buffers + // The constant buffer data and details are filled in outside of this method + size_t count = reflection.constantBlocks.size(); + m_GlobalState.constantBlocks.resize(count); + for(uint32_t i = 0; i < count; i++) + { + const ConstantBlock &cbuffer = reflection.constantBlocks[i]; + uint32_t bindCount = cbuffer.bindArraySize; + if(bindCount > 1) + { + // Create nested structure for constant buffer array + m_GlobalState.constantBlocks[i].members.resize(bindCount); + } + } + + struct ResourceList + { + VarType varType; + DebugVariableType debugVarType; + DescriptorCategory category; + ResourceClass resourceClass; + const rdcarray &resources; + rdcarray &dst; + }; + + // TODO: need to handle SRVs, UAVs, Samplers which are arrays + + // Create the variables for SRVs and UAVs + ResourceList lists[] = { + { + VarType::ReadOnlyResource, + DebugVariableType::ReadOnlyResource, + DescriptorCategory::ReadOnlyResource, + ResourceClass::SRV, + reflection.readOnlyResources, + m_GlobalState.readOnlyResources, + }, + { + VarType::ReadWriteResource, + DebugVariableType::ReadWriteResource, + DescriptorCategory::ReadWriteResource, + ResourceClass::UAV, + reflection.readWriteResources, + m_GlobalState.readWriteResources, + }, + }; + + for(ResourceList &list : lists) + { + list.dst.reserve(list.resources.size()); + for(uint32_t i = 0; i < list.resources.size(); i++) + { + const ShaderResource &res = list.resources[i]; + + // Fetch the resource name + BindingSlot slot(res.fixedBindNumber, res.fixedBindSetOrSpace); + rdcstr name = m_Program->GetResourceReferenceName(list.resourceClass, slot); + + ShaderVariable shaderVar(name, 0U, 0U, 0U, 0U); + shaderVar.rows = 1; + shaderVar.columns = 1; + shaderVar.SetBindIndex(ShaderBindIndex(list.category, i, 0)); + shaderVar.type = list.varType; + list.dst.push_back(shaderVar); + + SourceVariableMapping sourceVar; + sourceVar.name = res.name; + sourceVar.type = list.varType; + sourceVar.rows = 1; + sourceVar.columns = 1; + sourceVar.offset = 0; + + DebugVariableReference ref; + ref.type = list.debugVarType; + ref.name = shaderVar.name; + sourceVar.variables.push_back(ref); + + ret->sourceVars.push_back(sourceVar); + } + } + + // Create the variables for Samplers + count = reflection.samplers.size(); + m_GlobalState.samplers.resize(count); + for(uint32_t i = 0; i < count; i++) + { + ShaderSampler sampler = reflection.samplers[i]; + // Fetch the Sampler name + BindingSlot slot(sampler.fixedBindSetOrSpace, sampler.fixedBindNumber); + rdcstr name = m_Program->GetResourceReferenceName(ResourceClass::Sampler, slot); + + ShaderVariable shaderVar(name, 0U, 0U, 0U, 0U); + shaderVar.rows = 1; + shaderVar.columns = 1; + shaderVar.SetBindIndex(ShaderBindIndex(DescriptorCategory::Sampler, i, 0)); + shaderVar.type = VarType::Sampler; + m_GlobalState.samplers.push_back(shaderVar); + + SourceVariableMapping sourceVar; + sourceVar.name = sampler.name; + sourceVar.type = VarType::Sampler; + sourceVar.rows = 1; + sourceVar.columns = 1; + sourceVar.offset = 0; + + DebugVariableReference ref; + ref.type = DebugVariableType::Sampler; + ref.name = shaderVar.name; + sourceVar.variables.push_back(ref); + } + + rdcstr entryPoint = reflection.entryPoint; + rdcstr entryFunction = m_Program->GetEntryFunction(); + RDCASSERTEQUAL(entryPoint, entryFunction); + + m_EntryPointFunction = NULL; + for(const Function *f : m_Program->m_Functions) + { + if(!f->external && (f->name == entryFunction)) + { + m_EntryPointFunction = f; + break; + } + } + RDCASSERT(m_EntryPointFunction); + + m_GlobalInstructionOffsets.clear(); + uint32_t globalOffset = 0; + for(const Function *f : m_Program->m_Functions) + { + if(!f->external) + { + m_GlobalInstructionOffsets[f] = globalOffset; + globalOffset += (uint32_t)f->instructions.size(); + } + } + + dxbcContainer->FillTraceLineInfo(*ret); + + // TODO: Inputs + // ret.inputs = rdcarray inputs; + // ret.sourceVars : should it contain the Inputs + // TODO: Outputs + // ret.sourceVars : should it contain the Outputs + ret->constantBlocks = m_GlobalState.constantBlocks; + ret->readOnlyResources = m_GlobalState.readOnlyResources; + ret->readWriteResources = m_GlobalState.readWriteResources; + ret->samplers = m_GlobalState.samplers; + ret->debugger = this; + + return ret; +} + +rdcarray Debugger::ContinueDebug(D3D12APIWrapper *apiWrapper) +{ + ThreadState &active = GetActiveLane(); + + rdcarray ret; + + // initialise the first ShaderDebugState if we haven't stepped yet + if(m_Steps == 0) + { + ShaderDebugState initial; + + // we should be sitting at the entry point function prologue, step forward into the first + // block and past any function-local variable declarations + for(size_t lane = 0; lane < m_Workgroups.size(); lane++) + { + ThreadState &thread = m_Workgroups[lane]; + + if(lane == m_ActiveLaneIndex) + { + thread.EnterEntryPoint(m_EntryPointFunction, &initial); + // FillCallstack(thread, initial); + initial.nextInstruction = thread.m_GlobalInstructionIdx; + } + else + { + thread.EnterEntryPoint(NULL, NULL); + } + } + + ret.push_back(std::move(initial)); + + m_Steps++; + } + + // if we've finished, return an empty set to signify that + if(active.Finished()) + return ret; + + rdcarray activeMask; + + for(int stepEnd = m_Steps + 100; m_Steps < stepEnd;) + { + if(active.Finished()) + break; + + // calculate the current mask of which threads are active + CalcActiveMask(activeMask); + + // step all active members of the workgroup + for(size_t lane = 0; lane < m_Workgroups.size(); lane++) + { + if(activeMask[lane]) + { + ThreadState &thread = m_Workgroups[lane]; + if(thread.Finished()) + { + if(lane == m_ActiveLaneIndex) + ret.emplace_back(); + continue; + } + + if(lane == m_ActiveLaneIndex) + { + ShaderDebugState state; + + state.stepIndex = m_Steps; + thread.StepNext(&state, m_Workgroups); + + ret.push_back(std::move(state)); + + m_Steps++; + } + else + { + thread.StepNext(NULL, m_Workgroups); + } + } + } + } + return ret; +} }; // namespace DXILDebug diff --git a/renderdoc/driver/shaders/dxil/dxil_debug.h b/renderdoc/driver/shaders/dxil/dxil_debug.h index 60c1a34c53..6d4f3f6d94 100644 --- a/renderdoc/driver/shaders/dxil/dxil_debug.h +++ b/renderdoc/driver/shaders/dxil/dxil_debug.h @@ -30,9 +30,98 @@ namespace DXILDebug { -struct Debugger : public DXBCContainerDebugger +typedef rdcstr Id; +class Debugger; +class D3D12APIWrapper; + +typedef std::map BuiltinInputs; + +struct GlobalState { + GlobalState() = default; + BuiltinInputs builtinInputs; + + // allocated storage for opaque uniform blocks, does not change over the course of debugging + rdcarray constantBlocks; + + // workgroup private variables + rdcarray workgroups; + + // resources may be read-write but the variable itself doesn't change + rdcarray readOnlyResources; + rdcarray readWriteResources; + rdcarray samplers; +}; + +struct StackFrame +{ + StackFrame(const DXIL::Function *func) : function(func) {} + const DXIL::Function *function; +}; + +struct ThreadState +{ + ThreadState(uint32_t workgroupIndex, Debugger &debugger, const GlobalState &globalState); + ~ThreadState() = default; + + void EnterFunction(const DXIL::Function *function, const rdcarray &args); + void EnterEntryPoint(const DXIL::Function *function, ShaderDebugState *state); + void StepNext(ShaderDebugState *state, const rdcarray &workgroups); + + bool Finished() const; + bool ExecuteInstruction(const rdcarray &workgroups); + + Debugger &m_Debugger; + const DXIL::Program &m_Program; + const GlobalState &m_GlobalState; + + rdcarray m_Callstack; + ShaderDebugState *m_State = NULL; + + // Known SSA ShaderVariables + std::map m_Variables; + // Live variables at the current scope + rdcarray m_Live; + + const DXIL::Function *m_Function; + // The instruction index with the currenct function + uint32_t m_FunctionInstructionIdx; + // A global logical instruction index (bit like a PC) not the instruction index within a function + uint32_t m_GlobalInstructionIdx; + + // index in the pixel quad + uint32_t m_WorkgroupIndex; + bool m_Killed; + bool m_Ended; +}; + +class Debugger : public DXBCContainerDebugger +{ +public: Debugger() : DXBCContainerDebugger(true){}; + ShaderDebugTrace *BeginDebug(uint32_t eventId, const DXBC::DXBCContainer *dxbcContainer, + const ShaderReflection &reflection, uint32_t activeLaneIndex); + rdcarray ContinueDebug(D3D12APIWrapper *apiWrapper); + GlobalState &GetGlobalState() { return m_GlobalState; } + uint32_t GetGlobalInstructionIdx(const DXIL::Function *function, uint32_t functionInstIdx); + const DXIL::Program &GetProgram() { return *m_Program; } + +private: + void CalcActiveMask(rdcarray &activeMask); + ThreadState &GetActiveLane() { return m_Workgroups[m_ActiveLaneIndex]; } + + rdcarray m_Workgroups; + std::map m_GlobalInstructionOffsets; + + GlobalState m_GlobalState; + const DXBC::DXBCContainer *m_DXBC = NULL; + const DXIL::Program *m_Program = NULL; + const DXIL::Function *m_EntryPointFunction = NULL; + ShaderStage m_Stage; + + uint32_t m_EventId = 0; + uint32_t m_ActiveLaneIndex = 0; + int m_Steps = 0; }; }; // namespace DXILDebug