diff options
Diffstat (limited to 'external/optick/optick_gpu.d3d12.cpp')
-rw-r--r-- | external/optick/optick_gpu.d3d12.cpp | 382 |
1 files changed, 0 insertions, 382 deletions
diff --git a/external/optick/optick_gpu.d3d12.cpp b/external/optick/optick_gpu.d3d12.cpp deleted file mode 100644 index 1ee4dd9..0000000 --- a/external/optick/optick_gpu.d3d12.cpp +++ /dev/null @@ -1,382 +0,0 @@ -#include "optick.config.h" -#if USE_OPTICK -#if OPTICK_ENABLE_GPU_D3D12 - -#include "optick_common.h" -#include "optick_memory.h" -#include "optick_core.h" -#include "optick_gpu.h" - -#include <atomic> -#include <thread> - -#include <d3d12.h> -#include <dxgi.h> -#include <dxgi1_4.h> - - -#define OPTICK_CHECK(args) do { HRESULT __hr = args; (void)__hr; OPTICK_ASSERT(__hr == S_OK, "Failed check"); } while(false); - -namespace Optick -{ - class GPUProfilerD3D12 : public GPUProfiler - { - struct Frame - { - ID3D12CommandAllocator* commandAllocator; - ID3D12GraphicsCommandList* commandList; - - Frame() : commandAllocator(nullptr), commandList(nullptr) - { - Reset(); - } - - void Reset() - { - } - - void Shutdown(); - - ~Frame() - { - Shutdown(); - } - }; - - struct NodePayload - { - ID3D12CommandQueue* commandQueue; - ID3D12QueryHeap* queryHeap; - ID3D12Fence* syncFence; - array<Frame, NUM_FRAMES_DELAY> frames; - - NodePayload() : commandQueue(nullptr), queryHeap(nullptr), syncFence(nullptr) {} - ~NodePayload(); - }; - vector<NodePayload*> nodePayloads; - - ID3D12Resource* queryBuffer; - ID3D12Device* device; - - // VSync Stats - DXGI_FRAME_STATISTICS prevFrameStatistics; - - //void UpdateRange(uint32_t start, uint32_t finish) - void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue); - - void ResolveTimestamps(uint32_t startIndex, uint32_t count); - - void WaitForFrame(uint64_t frameNumber); - - public: - GPUProfilerD3D12(); - ~GPUProfilerD3D12(); - - void InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues); - - void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp); - - void Flip(IDXGISwapChain* swapChain); - - - // Interface implementation - ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override; - - void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override - { - QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp); - } - - void Flip(void* swapChain) override - { - Flip(static_cast<IDXGISwapChain*>(swapChain)); - } - }; - - template <class T> void SafeRelease(T **ppT) - { - if (*ppT) - { - (*ppT)->Release(); - *ppT = NULL; - } - } - - void InitGpuD3D12(void* device, void** cmdQueues, uint32_t numQueues) - { - GPUProfilerD3D12* gpuProfiler = Memory::New<GPUProfilerD3D12>(); - gpuProfiler->InitDevice((ID3D12Device*)device, (ID3D12CommandQueue**)cmdQueues, numQueues); - Core::Get().InitGPUProfiler(gpuProfiler); - } - - GPUProfilerD3D12::GPUProfilerD3D12() : queryBuffer(nullptr), device(nullptr) - { - prevFrameStatistics = { 0 }; - } - - GPUProfilerD3D12::~GPUProfilerD3D12() - { - WaitForFrame(frameNumber - 1); - - for (NodePayload* payload : nodePayloads) - Memory::Delete(payload); - nodePayloads.clear(); - - for (Node* node : nodes) - Memory::Delete(node); - nodes.clear(); - - SafeRelease(&queryBuffer); - } - - void GPUProfilerD3D12::InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues) - { - device = pDevice; - - uint32_t nodeCount = numCommandQueues; // device->GetNodeCount(); - - nodes.resize(nodeCount); - nodePayloads.resize(nodeCount); - - D3D12_HEAP_PROPERTIES heapDesc; - heapDesc.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapDesc.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapDesc.CreationNodeMask = 0; - heapDesc.VisibleNodeMask = (1u << nodeCount) - 1u; - heapDesc.Type = D3D12_HEAP_TYPE_READBACK; - - D3D12_RESOURCE_DESC resourceDesc; - resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resourceDesc.Alignment = 0; - resourceDesc.Width = MAX_QUERIES_COUNT * sizeof(int64_t); - resourceDesc.Height = 1; - resourceDesc.DepthOrArraySize = 1; - resourceDesc.MipLevels = 1; - resourceDesc.Format = DXGI_FORMAT_UNKNOWN; - resourceDesc.SampleDesc.Count = 1; - resourceDesc.SampleDesc.Quality = 0; - resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; - - OPTICK_CHECK(device->CreateCommittedResource( - &heapDesc, - D3D12_HEAP_FLAG_NONE, - &resourceDesc, - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&queryBuffer))); - - // Get Device Name - LUID adapterLUID = pDevice->GetAdapterLuid(); - - IDXGIFactory4* factory; - OPTICK_CHECK(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory))); - - IDXGIAdapter1* adapter; - factory->EnumAdapterByLuid(adapterLUID, IID_PPV_ARGS(&adapter)); - - DXGI_ADAPTER_DESC1 desc; - adapter->GetDesc1(&desc); - - adapter->Release(); - factory->Release(); - - char deviceName[128] = { 0 }; - wcstombs_s(deviceName, desc.Description, OPTICK_ARRAY_SIZE(deviceName) - 1); - - for (uint32_t nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex) - InitNodeInternal(deviceName, nodeIndex, pCommandQueues[nodeIndex]); - } - - void GPUProfilerD3D12::InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue) - { - GPUProfiler::InitNode(nodeName, nodeIndex); - - NodePayload* node = Memory::New<NodePayload>(); - nodePayloads[nodeIndex] = node; - node->commandQueue = pCmdQueue; - - D3D12_QUERY_HEAP_DESC queryHeapDesc; - queryHeapDesc.Count = MAX_QUERIES_COUNT; - queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; - queryHeapDesc.NodeMask = 1u << nodeIndex; - OPTICK_CHECK(device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&node->queryHeap))); - - OPTICK_CHECK(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&node->syncFence))); - - for (Frame& frame : node->frames) - { - OPTICK_CHECK(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frame.commandAllocator))); - OPTICK_CHECK(device->CreateCommandList(1u << nodeIndex, D3D12_COMMAND_LIST_TYPE_DIRECT, frame.commandAllocator, nullptr, IID_PPV_ARGS(&frame.commandList))); - OPTICK_CHECK(frame.commandList->Close()); - } - } - - void GPUProfilerD3D12::QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp) - { - if (currentState == STATE_RUNNING) - { - uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp); - context->EndQuery(nodePayloads[currentNode]->queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, index); - } - } - - void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count) - { - if (count) - { - Node* node = nodes[currentNode]; - - D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) }; - void* pData = nullptr; - queryBuffer->Map(0, &range, &pData); - memcpy(&node->queryGpuTimestamps[startIndex], (uint64_t*)pData + startIndex, sizeof(uint64_t) * count); - queryBuffer->Unmap(0, 0); - - // Convert GPU timestamps => CPU Timestamps - for (uint32_t index = startIndex; index < startIndex + count; ++index) - *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]); - } - } - - void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait) - { - OPTICK_EVENT(); - - NodePayload* payload = nodePayloads[currentNode]; - while (frameNumberToWait > payload->syncFence->GetCompletedValue()) - { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - } - } - - void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain) - { - OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug); - - std::lock_guard<std::recursive_mutex> lock(updateLock); - - if (currentState == STATE_STARTING) - currentState = STATE_RUNNING; - - if (currentState == STATE_RUNNING) - { - Node& node = *nodes[currentNode]; - NodePayload& payload = *nodePayloads[currentNode]; - - uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY; - uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY; - - //Frame& currentFrame = frames[frameNumber % NUM_FRAMES_DELAY]; - //Frame& nextFrame = frames[(frameNumber + 1) % NUM_FRAMES_DELAY]; - - QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex]; - QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex]; - - ID3D12GraphicsCommandList* commandList = payload.frames[currentFrameIndex].commandList; - ID3D12CommandAllocator* commandAllocator = payload.frames[currentFrameIndex].commandAllocator; - commandAllocator->Reset(); - commandList->Reset(commandAllocator, nullptr); - - if (EventData* frameEvent = currentFrame.frameEvent) - QueryTimestamp(commandList, &frameEvent->finish); - - // Generate GPU Frame event for the next frame - EventData& event = AddFrameEvent(); - QueryTimestamp(commandList, &event.start); - QueryTimestamp(commandList, &AddFrameTag().timestamp); - nextFrame.frameEvent = &event; - - uint32_t queryBegin = currentFrame.queryIndexStart; - uint32_t queryEnd = node.queryIndex; - - if (queryBegin != (uint32_t)-1) - { - OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!"); - currentFrame.queryIndexCount = queryEnd - queryBegin; - - uint32_t startIndex = queryBegin % MAX_QUERIES_COUNT; - uint32_t finishIndex = queryEnd % MAX_QUERIES_COUNT; - - if (startIndex < finishIndex) - { - commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, queryEnd - queryBegin, queryBuffer, startIndex * sizeof(int64_t)); - } - else - { - commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, MAX_QUERIES_COUNT - startIndex, queryBuffer, startIndex * sizeof(int64_t)); - commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0); - } - } - - commandList->Close(); - - payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList); - payload.commandQueue->Signal(payload.syncFence, frameNumber); - - // Preparing Next Frame - // Try resolve timestamps for the current frame - if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount) - { - WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY); - - uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT; - uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount; - ResolveTimestamps(resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart); - if (resolveFinish > MAX_QUERIES_COUNT) - ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT); - } - - nextFrame.queryIndexStart = queryEnd; - nextFrame.queryIndexCount = 0; - - // Process VSync - DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 }; - HRESULT result = swapChain->GetFrameStatistics(¤tFrameStatistics); - if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount)) - { - EventData& data = AddVSyncEvent(); - data.start = prevFrameStatistics.SyncQPCTime.QuadPart; - data.finish = currentFrameStatistics.SyncQPCTime.QuadPart; - } - prevFrameStatistics = currentFrameStatistics; - } - - ++frameNumber; - } - - GPUProfiler::ClockSynchronization GPUProfilerD3D12::GetClockSynchronization(uint32_t nodeIndex) - { - ClockSynchronization clock; - clock.frequencyCPU = GetHighPrecisionFrequency(); - nodePayloads[nodeIndex]->commandQueue->GetTimestampFrequency((uint64_t*)&clock.frequencyGPU); - nodePayloads[nodeIndex]->commandQueue->GetClockCalibration((uint64_t*)&clock.timestampGPU, (uint64_t*)&clock.timestampCPU); - return clock; - } - - GPUProfilerD3D12::NodePayload::~NodePayload() - { - SafeRelease(&queryHeap); - SafeRelease(&syncFence); - } - - void GPUProfilerD3D12::Frame::Shutdown() - { - SafeRelease(&commandAllocator); - SafeRelease(&commandList); - } -} - -#else -#include "optick_common.h" - -namespace Optick -{ - void InitGpuD3D12(void* /*device*/, void** /*cmdQueues*/, uint32_t /*numQueues*/) - { - OPTICK_FAILED("OPTICK_ENABLE_GPU_D3D12 is disabled! Can't initialize GPU Profiler!"); - } -} - -#endif //OPTICK_ENABLE_GPU_D3D12 -#endif //USE_OPTICK
\ No newline at end of file |