summaryrefslogtreecommitdiffstats
path: root/external/optick/optick_gpu.d3d12.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'external/optick/optick_gpu.d3d12.cpp')
-rw-r--r--external/optick/optick_gpu.d3d12.cpp382
1 files changed, 0 insertions, 382 deletions
diff --git a/external/optick/optick_gpu.d3d12.cpp b/external/optick/optick_gpu.d3d12.cpp
deleted file mode 100644
index 1ee4dd9..0000000
--- a/external/optick/optick_gpu.d3d12.cpp
+++ /dev/null
@@ -1,382 +0,0 @@
-#include "optick.config.h"
-#if USE_OPTICK
-#if OPTICK_ENABLE_GPU_D3D12
-
-#include "optick_common.h"
-#include "optick_memory.h"
-#include "optick_core.h"
-#include "optick_gpu.h"
-
-#include <atomic>
-#include <thread>
-
-#include <d3d12.h>
-#include <dxgi.h>
-#include <dxgi1_4.h>
-
-
-#define OPTICK_CHECK(args) do { HRESULT __hr = args; (void)__hr; OPTICK_ASSERT(__hr == S_OK, "Failed check"); } while(false);
-
-namespace Optick
-{
- class GPUProfilerD3D12 : public GPUProfiler
- {
- struct Frame
- {
- ID3D12CommandAllocator* commandAllocator;
- ID3D12GraphicsCommandList* commandList;
-
- Frame() : commandAllocator(nullptr), commandList(nullptr)
- {
- Reset();
- }
-
- void Reset()
- {
- }
-
- void Shutdown();
-
- ~Frame()
- {
- Shutdown();
- }
- };
-
- struct NodePayload
- {
- ID3D12CommandQueue* commandQueue;
- ID3D12QueryHeap* queryHeap;
- ID3D12Fence* syncFence;
- array<Frame, NUM_FRAMES_DELAY> frames;
-
- NodePayload() : commandQueue(nullptr), queryHeap(nullptr), syncFence(nullptr) {}
- ~NodePayload();
- };
- vector<NodePayload*> nodePayloads;
-
- ID3D12Resource* queryBuffer;
- ID3D12Device* device;
-
- // VSync Stats
- DXGI_FRAME_STATISTICS prevFrameStatistics;
-
- //void UpdateRange(uint32_t start, uint32_t finish)
- void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue);
-
- void ResolveTimestamps(uint32_t startIndex, uint32_t count);
-
- void WaitForFrame(uint64_t frameNumber);
-
- public:
- GPUProfilerD3D12();
- ~GPUProfilerD3D12();
-
- void InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues);
-
- void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp);
-
- void Flip(IDXGISwapChain* swapChain);
-
-
- // Interface implementation
- ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override;
-
- void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override
- {
- QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp);
- }
-
- void Flip(void* swapChain) override
- {
- Flip(static_cast<IDXGISwapChain*>(swapChain));
- }
- };
-
- template <class T> void SafeRelease(T **ppT)
- {
- if (*ppT)
- {
- (*ppT)->Release();
- *ppT = NULL;
- }
- }
-
- void InitGpuD3D12(void* device, void** cmdQueues, uint32_t numQueues)
- {
- GPUProfilerD3D12* gpuProfiler = Memory::New<GPUProfilerD3D12>();
- gpuProfiler->InitDevice((ID3D12Device*)device, (ID3D12CommandQueue**)cmdQueues, numQueues);
- Core::Get().InitGPUProfiler(gpuProfiler);
- }
-
- GPUProfilerD3D12::GPUProfilerD3D12() : queryBuffer(nullptr), device(nullptr)
- {
- prevFrameStatistics = { 0 };
- }
-
- GPUProfilerD3D12::~GPUProfilerD3D12()
- {
- WaitForFrame(frameNumber - 1);
-
- for (NodePayload* payload : nodePayloads)
- Memory::Delete(payload);
- nodePayloads.clear();
-
- for (Node* node : nodes)
- Memory::Delete(node);
- nodes.clear();
-
- SafeRelease(&queryBuffer);
- }
-
- void GPUProfilerD3D12::InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues)
- {
- device = pDevice;
-
- uint32_t nodeCount = numCommandQueues; // device->GetNodeCount();
-
- nodes.resize(nodeCount);
- nodePayloads.resize(nodeCount);
-
- D3D12_HEAP_PROPERTIES heapDesc;
- heapDesc.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
- heapDesc.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
- heapDesc.CreationNodeMask = 0;
- heapDesc.VisibleNodeMask = (1u << nodeCount) - 1u;
- heapDesc.Type = D3D12_HEAP_TYPE_READBACK;
-
- D3D12_RESOURCE_DESC resourceDesc;
- resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
- resourceDesc.Alignment = 0;
- resourceDesc.Width = MAX_QUERIES_COUNT * sizeof(int64_t);
- resourceDesc.Height = 1;
- resourceDesc.DepthOrArraySize = 1;
- resourceDesc.MipLevels = 1;
- resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
- resourceDesc.SampleDesc.Count = 1;
- resourceDesc.SampleDesc.Quality = 0;
- resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
- resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
-
- OPTICK_CHECK(device->CreateCommittedResource(
- &heapDesc,
- D3D12_HEAP_FLAG_NONE,
- &resourceDesc,
- D3D12_RESOURCE_STATE_COPY_DEST,
- nullptr,
- IID_PPV_ARGS(&queryBuffer)));
-
- // Get Device Name
- LUID adapterLUID = pDevice->GetAdapterLuid();
-
- IDXGIFactory4* factory;
- OPTICK_CHECK(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory)));
-
- IDXGIAdapter1* adapter;
- factory->EnumAdapterByLuid(adapterLUID, IID_PPV_ARGS(&adapter));
-
- DXGI_ADAPTER_DESC1 desc;
- adapter->GetDesc1(&desc);
-
- adapter->Release();
- factory->Release();
-
- char deviceName[128] = { 0 };
- wcstombs_s(deviceName, desc.Description, OPTICK_ARRAY_SIZE(deviceName) - 1);
-
- for (uint32_t nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex)
- InitNodeInternal(deviceName, nodeIndex, pCommandQueues[nodeIndex]);
- }
-
- void GPUProfilerD3D12::InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue)
- {
- GPUProfiler::InitNode(nodeName, nodeIndex);
-
- NodePayload* node = Memory::New<NodePayload>();
- nodePayloads[nodeIndex] = node;
- node->commandQueue = pCmdQueue;
-
- D3D12_QUERY_HEAP_DESC queryHeapDesc;
- queryHeapDesc.Count = MAX_QUERIES_COUNT;
- queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
- queryHeapDesc.NodeMask = 1u << nodeIndex;
- OPTICK_CHECK(device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&node->queryHeap)));
-
- OPTICK_CHECK(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&node->syncFence)));
-
- for (Frame& frame : node->frames)
- {
- OPTICK_CHECK(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frame.commandAllocator)));
- OPTICK_CHECK(device->CreateCommandList(1u << nodeIndex, D3D12_COMMAND_LIST_TYPE_DIRECT, frame.commandAllocator, nullptr, IID_PPV_ARGS(&frame.commandList)));
- OPTICK_CHECK(frame.commandList->Close());
- }
- }
-
- void GPUProfilerD3D12::QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp)
- {
- if (currentState == STATE_RUNNING)
- {
- uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp);
- context->EndQuery(nodePayloads[currentNode]->queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, index);
- }
- }
-
- void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count)
- {
- if (count)
- {
- Node* node = nodes[currentNode];
-
- D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) };
- void* pData = nullptr;
- queryBuffer->Map(0, &range, &pData);
- memcpy(&node->queryGpuTimestamps[startIndex], (uint64_t*)pData + startIndex, sizeof(uint64_t) * count);
- queryBuffer->Unmap(0, 0);
-
- // Convert GPU timestamps => CPU Timestamps
- for (uint32_t index = startIndex; index < startIndex + count; ++index)
- *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]);
- }
- }
-
- void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait)
- {
- OPTICK_EVENT();
-
- NodePayload* payload = nodePayloads[currentNode];
- while (frameNumberToWait > payload->syncFence->GetCompletedValue())
- {
- std::this_thread::sleep_for(std::chrono::milliseconds(1));
- }
- }
-
- void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain)
- {
- OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug);
-
- std::lock_guard<std::recursive_mutex> lock(updateLock);
-
- if (currentState == STATE_STARTING)
- currentState = STATE_RUNNING;
-
- if (currentState == STATE_RUNNING)
- {
- Node& node = *nodes[currentNode];
- NodePayload& payload = *nodePayloads[currentNode];
-
- uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY;
- uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY;
-
- //Frame& currentFrame = frames[frameNumber % NUM_FRAMES_DELAY];
- //Frame& nextFrame = frames[(frameNumber + 1) % NUM_FRAMES_DELAY];
-
- QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex];
- QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex];
-
- ID3D12GraphicsCommandList* commandList = payload.frames[currentFrameIndex].commandList;
- ID3D12CommandAllocator* commandAllocator = payload.frames[currentFrameIndex].commandAllocator;
- commandAllocator->Reset();
- commandList->Reset(commandAllocator, nullptr);
-
- if (EventData* frameEvent = currentFrame.frameEvent)
- QueryTimestamp(commandList, &frameEvent->finish);
-
- // Generate GPU Frame event for the next frame
- EventData& event = AddFrameEvent();
- QueryTimestamp(commandList, &event.start);
- QueryTimestamp(commandList, &AddFrameTag().timestamp);
- nextFrame.frameEvent = &event;
-
- uint32_t queryBegin = currentFrame.queryIndexStart;
- uint32_t queryEnd = node.queryIndex;
-
- if (queryBegin != (uint32_t)-1)
- {
- OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!");
- currentFrame.queryIndexCount = queryEnd - queryBegin;
-
- uint32_t startIndex = queryBegin % MAX_QUERIES_COUNT;
- uint32_t finishIndex = queryEnd % MAX_QUERIES_COUNT;
-
- if (startIndex < finishIndex)
- {
- commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, queryEnd - queryBegin, queryBuffer, startIndex * sizeof(int64_t));
- }
- else
- {
- commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, MAX_QUERIES_COUNT - startIndex, queryBuffer, startIndex * sizeof(int64_t));
- commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0);
- }
- }
-
- commandList->Close();
-
- payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList);
- payload.commandQueue->Signal(payload.syncFence, frameNumber);
-
- // Preparing Next Frame
- // Try resolve timestamps for the current frame
- if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount)
- {
- WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY);
-
- uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
- uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount;
- ResolveTimestamps(resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart);
- if (resolveFinish > MAX_QUERIES_COUNT)
- ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT);
- }
-
- nextFrame.queryIndexStart = queryEnd;
- nextFrame.queryIndexCount = 0;
-
- // Process VSync
- DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 };
- HRESULT result = swapChain->GetFrameStatistics(&currentFrameStatistics);
- if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount))
- {
- EventData& data = AddVSyncEvent();
- data.start = prevFrameStatistics.SyncQPCTime.QuadPart;
- data.finish = currentFrameStatistics.SyncQPCTime.QuadPart;
- }
- prevFrameStatistics = currentFrameStatistics;
- }
-
- ++frameNumber;
- }
-
- GPUProfiler::ClockSynchronization GPUProfilerD3D12::GetClockSynchronization(uint32_t nodeIndex)
- {
- ClockSynchronization clock;
- clock.frequencyCPU = GetHighPrecisionFrequency();
- nodePayloads[nodeIndex]->commandQueue->GetTimestampFrequency((uint64_t*)&clock.frequencyGPU);
- nodePayloads[nodeIndex]->commandQueue->GetClockCalibration((uint64_t*)&clock.timestampGPU, (uint64_t*)&clock.timestampCPU);
- return clock;
- }
-
- GPUProfilerD3D12::NodePayload::~NodePayload()
- {
- SafeRelease(&queryHeap);
- SafeRelease(&syncFence);
- }
-
- void GPUProfilerD3D12::Frame::Shutdown()
- {
- SafeRelease(&commandAllocator);
- SafeRelease(&commandList);
- }
-}
-
-#else
-#include "optick_common.h"
-
-namespace Optick
-{
- void InitGpuD3D12(void* /*device*/, void** /*cmdQueues*/, uint32_t /*numQueues*/)
- {
- OPTICK_FAILED("OPTICK_ENABLE_GPU_D3D12 is disabled! Can't initialize GPU Profiler!");
- }
-}
-
-#endif //OPTICK_ENABLE_GPU_D3D12
-#endif //USE_OPTICK \ No newline at end of file