diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 63 | ||||
-rw-r--r-- | src/video_core/gpu.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 19 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 20 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 80 | ||||
-rw-r--r-- | src/video_core/textures/decoders.cpp | 3 |
6 files changed, 148 insertions, 38 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 8d4ea3401..4eb507325 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -173,6 +173,13 @@ enum class SubOp : u64 { Min = 0x8, }; +enum class FloatRoundingOp : u64 { + None = 0, + Floor = 1, + Ceil = 2, + Trunc = 3, +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -230,22 +237,19 @@ union Instruction { std::memcpy(&result, &imm, sizeof(imm)); return result; } - } alu; - union { - BitField<39, 5, u64> shift_amount; - BitField<20, 19, u64> immediate_low; - BitField<56, 1, u64> immediate_high; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_a; - - s32 GetImmediate() const { - u32 immediate = static_cast<u32>(immediate_low | (immediate_high << 19)); + s32 GetSignedImm20_20() const { + u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19)); // Sign extend the 20-bit value. u32 mask = 1U << (20 - 1); return static_cast<s32>((immediate ^ mask) - mask); } + } alu; + union { + BitField<39, 5, u64> shift_amount; + BitField<48, 1, u64> negate_b; + BitField<49, 1, u64> negate_a; } iscadd; union { @@ -293,11 +297,20 @@ union Instruction { union { BitField<10, 2, Register::Size> size; - BitField<13, 1, u64> is_signed; + BitField<12, 1, u64> is_output_signed; + BitField<13, 1, u64> is_input_signed; BitField<41, 2, u64> selector; BitField<45, 1, u64> negate_a; BitField<49, 1, u64> abs_a; BitField<50, 1, u64> saturate_a; + + union { + BitField<39, 2, FloatRoundingOp> rounding; + } f2i; + + union { + BitField<39, 4, u64> rounding; + } f2f; } conversion; union { @@ -328,15 +341,16 @@ union Instruction { } texs; union { - BitField<20, 5, u64> target; + BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; s32 GetBranchTarget() const { // Sign extend the branch target offset - u32 mask = 1U << (5 - 1); + u32 mask = 1U << (24 - 1); u32 value = static_cast<u32>(target); - // The branch offset is relative to the next instruction, so add 1 to it. - return static_cast<s32>((value ^ mask) - mask) + 1; + // The branch offset is relative to the next instruction and is stored in bytes, so + // divide it by the size of an instruction and add 1 to it. + return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; } } bra; @@ -402,6 +416,9 @@ public: MOV_R, MOV_IMM, MOV32_IMM, + SHL_C, + SHL_R, + SHL_IMM, SHR_C, SHR_R, SHR_IMM, @@ -424,6 +441,7 @@ public: Trivial, Arithmetic, Logic, + Shift, ScaledAdd, Ffma, Flow, @@ -558,20 +576,23 @@ private: INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), - INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"), - INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"), - INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"), + INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"), + INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"), + INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"), INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"), - INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"), - INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"), - INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"), INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"), + INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), + INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), + INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), + INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"), + INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"), + INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 9f8465444..5852b9619 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -20,6 +20,7 @@ enum class RenderTargetFormat : u32 { RGB10_A2_UNORM = 0xD1, RGBA8_UNORM = 0xD5, RGBA8_SRGB = 0xD6, + R11G11B10_FLOAT = 0xE0, }; /// Returns the number of bytes per pixel of each rendertarget format. diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index d6048f639..9164d7f34 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -50,6 +50,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 @@ -60,8 +61,10 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); if (type == SurfaceType::ColorTexture) { ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); - // For now only UNORM components are supported, or RGBA16F which is type FLOAT - ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F); + // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are + // type FLOAT + ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F || + pixel_format == PixelFormat::R11FG11FB10F); return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { // TODO(Subv): Implement depth formats @@ -110,11 +113,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, - MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, - MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, + MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, + MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, + MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, + MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>, + MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, + MortonCopy<true, PixelFormat::DXN1>, }; static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, @@ -127,6 +131,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: MortonCopy<false, PixelFormat::A1B5G5R5>, MortonCopy<false, PixelFormat::R8>, MortonCopy<false, PixelFormat::RGBA16F>, + MortonCopy<false, PixelFormat::R11FG11FB10F>, // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported nullptr, nullptr, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 6f08678ab..0f43e863d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -60,10 +60,11 @@ struct SurfaceParams { A1B5G5R5 = 3, R8 = 4, RGBA16F = 5, - DXT1 = 6, - DXT23 = 7, - DXT45 = 8, - DXN1 = 9, // This is also known as BC4 + R11FG11FB10F = 6, + DXT1 = 7, + DXT23 = 8, + DXT45 = 9, + DXN1 = 10, // This is also known as BC4 Max, Invalid = 255, @@ -104,7 +105,8 @@ struct SurfaceParams { 1, // A2B10G10R10 1, // A1B5G5R5 1, // R8 - 2, // RGBA16F + 1, // RGBA16F + 1, // R11FG11FB10F 4, // DXT1 4, // DXT23 4, // DXT45 @@ -129,6 +131,7 @@ struct SurfaceParams { 16, // A1B5G5R5 8, // R8 64, // RGBA16F + 32, // R11FG11FB10F 64, // DXT1 128, // DXT23 128, // DXT45 @@ -151,6 +154,8 @@ struct SurfaceParams { return PixelFormat::A2B10G10R10; case Tegra::RenderTargetFormat::RGBA16_FLOAT: return PixelFormat::RGBA16F; + case Tegra::RenderTargetFormat::R11G11B10_FLOAT: + return PixelFormat::R11FG11FB10F; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -182,6 +187,8 @@ struct SurfaceParams { return PixelFormat::R8; case Tegra::Texture::TextureFormat::R16_G16_B16_A16: return PixelFormat::RGBA16F; + case Tegra::Texture::TextureFormat::BF10GF11RF11: + return PixelFormat::R11FG11FB10F; case Tegra::Texture::TextureFormat::DXT1: return PixelFormat::DXT1; case Tegra::Texture::TextureFormat::DXT23: @@ -211,6 +218,8 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::R8; case PixelFormat::RGBA16F: return Tegra::Texture::TextureFormat::R16_G16_B16_A16; + case PixelFormat::R11FG11FB10F: + return Tegra::Texture::TextureFormat::BF10GF11RF11; case PixelFormat::DXT1: return Tegra::Texture::TextureFormat::DXT1; case PixelFormat::DXT23: @@ -243,6 +252,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGB10_A2_UNORM: return ComponentType::UNorm; case Tegra::RenderTargetFormat::RGBA16_FLOAT: + case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return ComponentType::Float; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index eb8d37c9b..4a41e7798 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -893,6 +893,35 @@ private: } break; } + + case OpCode::Type::Shift: { + std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, false); + std::string op_b; + + if (instr.is_b_imm) { + op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; + } else { + if (instr.is_b_gpr) { + op_b += regs.GetRegisterAsInteger(instr.gpr20); + } else { + op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); + } + } + + switch (opcode->GetId()) { + case OpCode::Id::SHL_C: + case OpCode::Id::SHL_R: + case OpCode::Id::SHL_IMM: + regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1); + break; + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + break; + } + case OpCode::Type::ScaledAdd: { std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); @@ -902,12 +931,12 @@ private: std::string op_b = instr.iscadd.negate_b ? "-" : ""; if (instr.is_b_imm) { - op_b += '(' + std::to_string(instr.iscadd.GetImmediate()) + ')'; + op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; } else { if (instr.is_b_gpr) { op_b += regs.GetRegisterAsInteger(instr.gpr20); } else { - op_b += regs.GetUniform(instr.uniform, instr.gpr0); + op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer); } } @@ -962,18 +991,20 @@ private: ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); std::string op_a = - regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed); + regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed); if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; } - regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, + 1); break; } case OpCode::Id::I2F_R: { + ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); std::string op_a = - regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed); + regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed); if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; @@ -983,6 +1014,8 @@ private: break; } case OpCode::Id::F2F_R: { + // TODO(Subv): Implement rounding operations. + ASSERT_MSG(instr.conversion.f2f.rounding == 0, "Unimplemented rounding operation"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); if (instr.conversion.abs_a) { @@ -992,6 +1025,43 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); break; } + case OpCode::Id::F2I_R: { + std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); + + if (instr.conversion.abs_a) { + op_a = "abs(" + op_a + ')'; + } + + using Tegra::Shader::FloatRoundingOp; + switch (instr.conversion.f2i.rounding) { + case FloatRoundingOp::None: + break; + case FloatRoundingOp::Floor: + op_a = "floor(" + op_a + ')'; + break; + case FloatRoundingOp::Ceil: + op_a = "ceil(" + op_a + ')'; + break; + case FloatRoundingOp::Trunc: + op_a = "trunc(" + op_a + ')'; + break; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}", + static_cast<u32>(instr.conversion.f2i.rounding.Value())); + UNREACHABLE(); + break; + } + + if (instr.conversion.is_output_signed) { + op_a = "int(" + op_a + ')'; + } else { + op_a = "uint(" + op_a + ')'; + } + + regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, + 1); + break; + } default: { NGLOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName()); UNREACHABLE(); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 2d2af5554..7bf9c4c4b 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -55,6 +55,7 @@ u32 BytesPerPixel(TextureFormat format) { return 16; case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: + case TextureFormat::BF10GF11RF11: return 4; case TextureFormat::A1B5G5R5: case TextureFormat::B5G6R5: @@ -92,6 +93,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, case TextureFormat::B5G6R5: case TextureFormat::R8: case TextureFormat::R16_G16_B16_A16: + case TextureFormat::BF10GF11RF11: CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, unswizzled_data.data(), true, block_height); break; @@ -118,6 +120,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat case TextureFormat::A1B5G5R5: case TextureFormat::B5G6R5: case TextureFormat::R8: + case TextureFormat::BF10GF11RF11: // TODO(Subv): For the time being just forward the same data without any decoding. rgba_data = texture_data; break; |