summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeModules/GenerateSCMRev.cmake2
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/shader/control_flow.cpp393
-rw-r--r--src/video_core/shader/control_flow.h55
-rw-r--r--src/video_core/shader/decode.cpp39
6 files changed, 477 insertions, 16 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index dd65cfe42..abdc74428 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -82,6 +82,8 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/decode/shift.cpp"
"${VIDEO_CORE}/shader/decode/video.cpp"
"${VIDEO_CORE}/shader/decode/xmad.cpp"
+ "${VIDEO_CORE}/shader/control_flow.cpp"
+ "${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/decode.cpp"
"${VIDEO_CORE}/shader/node.h"
"${VIDEO_CORE}/shader/node_helper.cpp"
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2554add28..2b4266f29 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/decode/shift.cpp"
"${VIDEO_CORE}/shader/decode/video.cpp"
"${VIDEO_CORE}/shader/decode/xmad.cpp"
+ "${VIDEO_CORE}/shader/control_flow.cpp"
+ "${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/decode.cpp"
"${VIDEO_CORE}/shader/node.h"
"${VIDEO_CORE}/shader/node_helper.cpp"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6839abe71..cd32c65d3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -103,6 +103,8 @@ add_library(video_core STATIC
shader/decode/video.cpp
shader/decode/xmad.cpp
shader/decode/other.cpp
+ shader/control_flow.cpp
+ shader/control_flow.h
shader/decode.cpp
shader/node_helper.cpp
shader/node_helper.h
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
new file mode 100644
index 000000000..fcf22c7f2
--- /dev/null
+++ b/src/video_core/shader/control_flow.cpp
@@ -0,0 +1,393 @@
+
+#include <list>
+#include <map>
+#include <unordered_set>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/shader/control_flow.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+constexpr s32 unassigned_branch = -2;
+
+struct BlockBranchInfo {
+ Condition condition{};
+ s32 address{exit_branch};
+ bool kill{};
+ bool is_sync{};
+ bool is_brk{};
+};
+
+struct BlockInfo {
+ BlockInfo() {}
+ u32 start{};
+ u32 end{};
+ bool visited{};
+ BlockBranchInfo branch{};
+
+ bool IsInside(const u32 address) const {
+ return start <= address && address <= end;
+ }
+};
+
+struct Stamp {
+ Stamp() = default;
+ Stamp(u32 address, u32 target) : address{address}, target{target} {}
+ u32 address{};
+ u32 target{};
+ bool operator==(const Stamp& sb) const {
+ return std::tie(address, target) == std::tie(sb.address, sb.target);
+ }
+ bool operator<(const Stamp& sb) const {
+ return address < sb.address;
+ }
+ bool operator>(const Stamp& sb) const {
+ return address > sb.address;
+ }
+ bool operator<=(const Stamp& sb) const {
+ return address <= sb.address;
+ }
+ bool operator>=(const Stamp& sb) const {
+ return address >= sb.address;
+ }
+};
+
+struct CFGRebuildState {
+ explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size)
+ : program_code{program_code}, program_size{program_size} {
+ // queries.clear();
+ block_info.clear();
+ labels.clear();
+ visited_address.clear();
+ ssy_labels.clear();
+ pbk_labels.clear();
+ inspect_queries.clear();
+ }
+
+ std::vector<BlockInfo> block_info{};
+ std::list<u32> inspect_queries{};
+ // std::list<Query> queries{};
+ std::unordered_set<u32> visited_address{};
+ std::unordered_set<u32> labels{};
+ std::set<Stamp> ssy_labels;
+ std::set<Stamp> pbk_labels;
+ const ProgramCode& program_code;
+ const std::size_t program_size;
+};
+
+enum class BlockCollision : u32 { None = 0, Found = 1, Inside = 2 };
+
+std::pair<BlockCollision, std::vector<BlockInfo>::iterator> TryGetBlock(CFGRebuildState& state,
+ u32 address) {
+ auto it = state.block_info.begin();
+ while (it != state.block_info.end()) {
+ if (it->start == address) {
+ return {BlockCollision::Found, it};
+ }
+ if (it->IsInside(address)) {
+ return {BlockCollision::Inside, it};
+ }
+ it++;
+ }
+ return {BlockCollision::None, it};
+}
+
+struct ParseInfo {
+ BlockBranchInfo branch_info{};
+ u32 end_address{};
+};
+
+BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
+ auto& it = state.block_info.emplace_back();
+ it.start = start;
+ it.end = end;
+ state.visited_address.insert(start);
+ return &it;
+}
+
+Pred GetPredicate(u32 index, bool negated) {
+ return static_cast<Pred>(index + (negated ? 8 : 0));
+}
+
+enum class ParseResult : u32 {
+ ControlCaught = 0,
+ BlockEnd = 1,
+ AbnormalFlow = 2,
+};
+
+ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) {
+
+ u32 offset = static_cast<u32>(address);
+ u32 end_address = static_cast<u32>(state.program_size - 10U) * 8U;
+
+ auto insert_label = ([](CFGRebuildState& state, u32 address) {
+ auto pair = state.labels.emplace(address);
+ if (pair.second) {
+ state.inspect_queries.push_back(address);
+ }
+ });
+
+ while (true) {
+ if (offset >= end_address) {
+ parse_info.branch_info.address = exit_branch;
+ break;
+ }
+ if (state.visited_address.count(offset) != 0) {
+ parse_info.branch_info.address = offset;
+ break;
+ }
+ const Instruction instr = {state.program_code[offset]};
+ const auto opcode = OpCode::Decode(instr);
+ if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
+ offset++;
+ continue;
+ }
+
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::EXIT: {
+ const auto pred_index = static_cast<u32>(instr.pred.pred_index);
+ parse_info.branch_info.condition.predicate =
+ GetPredicate(pred_index, instr.negate_pred != 0);
+ if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ offset++;
+ continue;
+ }
+ const ConditionCode cc = instr.flow_condition_code;
+ parse_info.branch_info.condition.cc = cc;
+ if (cc == ConditionCode::F) {
+ offset++;
+ continue;
+ }
+ parse_info.branch_info.address = exit_branch;
+ parse_info.branch_info.kill = false;
+ parse_info.branch_info.is_sync = false;
+ parse_info.branch_info.is_brk = false;
+ parse_info.end_address = offset;
+
+ return ParseResult::ControlCaught;
+ }
+ case OpCode::Id::BRA: {
+ if (instr.bra.constant_buffer != 0) {
+ return ParseResult::AbnormalFlow;
+ }
+ const auto pred_index = static_cast<u32>(instr.pred.pred_index);
+ parse_info.branch_info.condition.predicate =
+ GetPredicate(pred_index, instr.negate_pred != 0);
+ if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ offset++;
+ continue;
+ }
+ const ConditionCode cc = instr.flow_condition_code;
+ parse_info.branch_info.condition.cc = cc;
+ if (cc == ConditionCode::F) {
+ offset++;
+ continue;
+ }
+ u32 branch_offset = offset + instr.bra.GetBranchTarget();
+ if (branch_offset == 0) {
+ parse_info.branch_info.address = exit_branch;
+ } else {
+ parse_info.branch_info.address = branch_offset;
+ }
+ insert_label(state, branch_offset);
+ parse_info.branch_info.kill = false;
+ parse_info.branch_info.is_sync = false;
+ parse_info.branch_info.is_brk = false;
+ parse_info.end_address = offset;
+
+ return ParseResult::ControlCaught;
+ }
+ case OpCode::Id::SYNC: {
+ parse_info.branch_info.condition;
+ const auto pred_index = static_cast<u32>(instr.pred.pred_index);
+ parse_info.branch_info.condition.predicate =
+ GetPredicate(pred_index, instr.negate_pred != 0);
+ if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ offset++;
+ continue;
+ }
+ const ConditionCode cc = instr.flow_condition_code;
+ parse_info.branch_info.condition.cc = cc;
+ if (cc == ConditionCode::F) {
+ offset++;
+ continue;
+ }
+ parse_info.branch_info.address = unassigned_branch;
+ parse_info.branch_info.kill = false;
+ parse_info.branch_info.is_sync = true;
+ parse_info.branch_info.is_brk = false;
+ parse_info.end_address = offset;
+
+ return ParseResult::ControlCaught;
+ }
+ case OpCode::Id::BRK: {
+ parse_info.branch_info.condition;
+ const auto pred_index = static_cast<u32>(instr.pred.pred_index);
+ parse_info.branch_info.condition.predicate =
+ GetPredicate(pred_index, instr.negate_pred != 0);
+ if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ offset++;
+ continue;
+ }
+ const ConditionCode cc = instr.flow_condition_code;
+ parse_info.branch_info.condition.cc = cc;
+ if (cc == ConditionCode::F) {
+ offset++;
+ continue;
+ }
+ parse_info.branch_info.address = unassigned_branch;
+ parse_info.branch_info.kill = false;
+ parse_info.branch_info.is_sync = false;
+ parse_info.branch_info.is_brk = true;
+ parse_info.end_address = offset;
+
+ return ParseResult::ControlCaught;
+ }
+ case OpCode::Id::KIL: {
+ parse_info.branch_info.condition;
+ const auto pred_index = static_cast<u32>(instr.pred.pred_index);
+ parse_info.branch_info.condition.predicate =
+ GetPredicate(pred_index, instr.negate_pred != 0);
+ if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ offset++;
+ continue;
+ }
+ const ConditionCode cc = instr.flow_condition_code;
+ parse_info.branch_info.condition.cc = cc;
+ if (cc == ConditionCode::F) {
+ offset++;
+ continue;
+ }
+ parse_info.branch_info.address = exit_branch;
+ parse_info.branch_info.kill = true;
+ parse_info.branch_info.is_sync = false;
+ parse_info.branch_info.is_brk = false;
+ parse_info.end_address = offset;
+
+ return ParseResult::ControlCaught;
+ }
+ case OpCode::Id::SSY: {
+ const u32 target = offset + instr.bra.GetBranchTarget();
+ insert_label(state, target);
+ state.ssy_labels.emplace(offset, target);
+ break;
+ }
+ case OpCode::Id::PBK: {
+ const u32 target = offset + instr.bra.GetBranchTarget();
+ insert_label(state, target);
+ state.pbk_labels.emplace(offset, target);
+ break;
+ }
+ default:
+ break;
+ }
+
+ offset++;
+ }
+ parse_info.branch_info.kill = false;
+ parse_info.branch_info.is_sync = false;
+ parse_info.branch_info.is_brk = false;
+ parse_info.end_address = offset - 1;
+ return ParseResult::BlockEnd;
+}
+
+bool TryInspectAddress(CFGRebuildState& state) {
+ if (state.inspect_queries.empty()) {
+ return false;
+ }
+ u32 address = state.inspect_queries.front();
+ state.inspect_queries.pop_front();
+ auto search_result = TryGetBlock(state, address);
+ BlockInfo* block_info;
+ switch (search_result.first) {
+ case BlockCollision::Found: {
+ return true;
+ break;
+ }
+ case BlockCollision::Inside: {
+ // This case is the tricky one:
+ // We need to Split the block in 2 sepprate blocks
+ auto it = search_result.second;
+ block_info = CreateBlockInfo(state, address, it->end);
+ it->end = address - 1;
+ block_info->branch = it->branch;
+ BlockBranchInfo forward_branch{};
+ forward_branch.address = address;
+ it->branch = forward_branch;
+ return true;
+ break;
+ }
+ default:
+ break;
+ }
+ ParseInfo parse_info;
+ ParseResult parse_result = ParseCode(state, address, parse_info);
+ if (parse_result == ParseResult::AbnormalFlow) {
+ // if it's the end of the program, end it safely
+ // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
+ return false;
+ }
+
+ block_info = CreateBlockInfo(state, address, parse_info.end_address);
+ block_info->branch = parse_info.branch_info;
+ if (parse_info.branch_info.condition.IsUnconditional()) {
+ return true;
+ }
+
+ u32 fallthrough_address = parse_info.end_address + 1;
+ state.inspect_queries.push_front(fallthrough_address);
+ return true;
+}
+
+bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address,
+ ShaderCharacteristics& result_out) {
+ CFGRebuildState state{program_code, program_size};
+ // Inspect Code and generate blocks
+ state.labels.clear();
+ state.labels.emplace(start_address);
+ state.inspect_queries.push_back(start_address);
+ while (!state.inspect_queries.empty()) {
+ if (!TryInspectAddress(state)) {
+ return false;
+ }
+ }
+ std::sort(state.block_info.begin(), state.block_info.end(),
+ [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
+ // Remove unvisited blocks
+ result_out.blocks.clear();
+ result_out.decompilable = false;
+ result_out.start = start_address;
+ result_out.end = start_address;
+ for (auto& block : state.block_info) {
+ ShaderBlock new_block{};
+ new_block.start = block.start;
+ new_block.end = block.end;
+ new_block.branch.cond = block.branch.condition;
+ new_block.branch.kills = block.branch.kill;
+ new_block.branch.address = block.branch.address;
+ result_out.end = std::max(result_out.end, block.end);
+ result_out.blocks.push_back(new_block);
+ }
+ if (result_out.decompilable) {
+ return true;
+ }
+ auto back = result_out.blocks.begin();
+ auto next = std::next(back);
+ while (next != result_out.blocks.end()) {
+ if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
+ back->end = next->end;
+ next = result_out.blocks.erase(next);
+ continue;
+ }
+ back = next;
+ next++;
+ }
+ return true;
+}
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
new file mode 100644
index 000000000..16736d57a
--- /dev/null
+++ b/src/video_core/shader/control_flow.h
@@ -0,0 +1,55 @@
+#pragma once
+
+#include <cstring>
+#include <list>
+#include <optional>
+#include <vector>
+
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::ConditionCode;
+using Tegra::Shader::Pred;
+
+constexpr s32 exit_branch = -1;
+
+struct Condition {
+ Pred predicate{Pred::UnusedIndex};
+ ConditionCode cc{ConditionCode::T};
+
+ bool IsUnconditional() const {
+ return (predicate == Pred::UnusedIndex) && (cc == ConditionCode::T);
+ }
+};
+
+struct ShaderBlock {
+ ShaderBlock() {}
+ ShaderBlock(const ShaderBlock& sb) = default;
+ u32 start{};
+ u32 end{};
+ struct Branch {
+ Condition cond{};
+ bool kills{};
+ s32 address{};
+ bool operator==(const Branch& b) const {
+ return std::memcmp(this, &b, sizeof(Branch)) == 0;
+ }
+ } branch;
+ bool operator==(const ShaderBlock& sb) const {
+ return std::memcmp(this, &sb, sizeof(ShaderBlock)) == 0;
+ }
+};
+
+struct ShaderCharacteristics {
+ std::list<ShaderBlock> blocks;
+ bool decompilable{};
+ u32 start;
+ u32 end;
+};
+
+bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address,
+ ShaderCharacteristics& result_out);
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2c9ff28f2..7f433c56b 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -11,6 +11,7 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
+#include "video_core/shader/control_flow.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
@@ -51,25 +52,31 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
- std::set<u32> labels;
- const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels);
- if (exit_method != ExitMethod::AlwaysEnd) {
- UNREACHABLE_MSG("Program does not always end");
- }
-
- if (labels.empty()) {
- basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)});
+ ShaderCharacteristics shader_info{};
+ bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info);
+ if (can_proceed) {
+ coverage_begin = shader_info.start;
+ coverage_end = shader_info.end;
+ if (shader_info.decompilable) {
+ return;
+ }
+ // we can't decompile it, fallback to standard method
+ for (const auto& block : shader_info.blocks) {
+ basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
+ }
return;
}
-
- labels.insert(main_offset);
-
- for (const u32 label : labels) {
- const auto next_it = labels.lower_bound(label + 1);
- const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it;
-
- basic_blocks.insert({label, DecodeRange(label, next_label)});
+ LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling");
+
+ // Now we need to deal with an undecompilable shader. We need to brute force
+ // a shader that captures every position.
+ coverage_begin = shader_info.start;
+ const u32 shader_end = static_cast<u32>(MAX_PROGRAM_LENGTH);
+ coverage_end = shader_end;
+ for (u32 label = main_offset; label < shader_end; label++) {
+ basic_blocks.insert({label, DecodeRange(label, label + 1)});
}
+ return;
}
ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {