6 files changed, 148 insertions, 38 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 8d4ea3401..4eb507325 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -173,6 +173,13 @@ enum class SubOp : u64 {
     Min = 0x8,
 };
 
+enum class FloatRoundingOp : u64 {
+    None = 0,
+    Floor = 1,
+    Ceil = 2,
+    Trunc = 3,
+};
+
 union Instruction {
     Instruction& operator=(const Instruction& instr) {
         value = instr.value;
@@ -230,22 +237,19 @@ union Instruction {
             std::memcpy(&result, &imm, sizeof(imm));
             return result;
         }
-    } alu;
 
-    union {
-        BitField<39, 5, u64> shift_amount;
-        BitField<20, 19, u64> immediate_low;
-        BitField<56, 1, u64> immediate_high;
-        BitField<48, 1, u64> negate_b;
-        BitField<49, 1, u64> negate_a;
-
-        s32 GetImmediate() const {
-            u32 immediate = static_cast<u32>(immediate_low | (immediate_high << 19));
+        s32 GetSignedImm20_20() const {
+            u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
             // Sign extend the 20-bit value.
             u32 mask = 1U << (20 - 1);
             return static_cast<s32>((immediate ^ mask) - mask);
         }
+    } alu;
 
+    union {
+        BitField<39, 5, u64> shift_amount;
+        BitField<48, 1, u64> negate_b;
+        BitField<49, 1, u64> negate_a;
     } iscadd;
 
     union {
@@ -293,11 +297,20 @@ union Instruction {
 
     union {
         BitField<10, 2, Register::Size> size;
-        BitField<13, 1, u64> is_signed;
+        BitField<12, 1, u64> is_output_signed;
+        BitField<13, 1, u64> is_input_signed;
         BitField<41, 2, u64> selector;
         BitField<45, 1, u64> negate_a;
         BitField<49, 1, u64> abs_a;
         BitField<50, 1, u64> saturate_a;
+
+        union {
+            BitField<39, 2, FloatRoundingOp> rounding;
+        } f2i;
+
+        union {
+            BitField<39, 4, u64> rounding;
+        } f2f;
     } conversion;
 
     union {
@@ -328,15 +341,16 @@ union Instruction {
     } texs;
 
     union {
-        BitField<20, 5, u64> target;
+        BitField<20, 24, u64> target;
         BitField<5, 1, u64> constant_buffer;
 
         s32 GetBranchTarget() const {
             // Sign extend the branch target offset
-            u32 mask = 1U << (5 - 1);
+            u32 mask = 1U << (24 - 1);
             u32 value = static_cast<u32>(target);
-            // The branch offset is relative to the next instruction, so add 1 to it.
-            return static_cast<s32>((value ^ mask) - mask) + 1;
+            // The branch offset is relative to the next instruction and is stored in bytes, so
+            // divide it by the size of an instruction and add 1 to it.
+            return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
         }
     } bra;
 
@@ -402,6 +416,9 @@ public:
         MOV_R,
         MOV_IMM,
         MOV32_IMM,
+        SHL_C,
+        SHL_R,
+        SHL_IMM,
         SHR_C,
         SHR_R,
         SHR_IMM,
@@ -424,6 +441,7 @@ public:
         Trivial,
         Arithmetic,
         Logic,
+        Shift,
         ScaledAdd,
         Ffma,
         Flow,
@@ -558,20 +576,23 @@ private:
             INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
             INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
             INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
-            INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"),
-            INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"),
-            INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"),
+            INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
+            INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
+            INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
             INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
             INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
             INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
             INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"),
-            INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"),
-            INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"),
-            INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"),
             INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
             INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
             INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
             INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"),
+            INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
+            INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
+            INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
+            INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
+            INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
+            INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
             INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
             INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
             INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 9f8465444..5852b9619 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -20,6 +20,7 @@ enum class RenderTargetFormat : u32 {
     RGB10_A2_UNORM = 0xD1,
     RGBA8_UNORM = 0xD5,
     RGBA8_SRGB = 0xD6,
+    R11G11B10_FLOAT = 0xE0,
 };
 
 /// Returns the number of bytes per pixel of each rendertarget format.
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index d6048f639..9164d7f34 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -50,6 +50,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false},                // A1B5G5R5
     {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false},                                   // R8
     {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false},                                // RGBA16F
+    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false},        // R11FG11FB10F
     {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true},   // DXT1
     {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
     {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
@@ -60,8 +61,10 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
     const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
     if (type == SurfaceType::ColorTexture) {
         ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
-        // For now only UNORM components are supported, or RGBA16F which is type FLOAT
-        ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F);
+        // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are
+        // type FLOAT
+        ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
+               pixel_format == PixelFormat::R11FG11FB10F);
         return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
     } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
         // TODO(Subv): Implement depth formats
@@ -110,11 +113,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
                                      Tegra::GPUVAddr),
                             SurfaceParams::MaxPixelFormat>
     morton_to_gl_fns = {
-        MortonCopy<true, PixelFormat::ABGR8>,       MortonCopy<true, PixelFormat::B5G6R5>,
-        MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
-        MortonCopy<true, PixelFormat::R8>,          MortonCopy<true, PixelFormat::RGBA16F>,
-        MortonCopy<true, PixelFormat::DXT1>,        MortonCopy<true, PixelFormat::DXT23>,
-        MortonCopy<true, PixelFormat::DXT45>,       MortonCopy<true, PixelFormat::DXN1>,
+        MortonCopy<true, PixelFormat::ABGR8>,        MortonCopy<true, PixelFormat::B5G6R5>,
+        MortonCopy<true, PixelFormat::A2B10G10R10>,  MortonCopy<true, PixelFormat::A1B5G5R5>,
+        MortonCopy<true, PixelFormat::R8>,           MortonCopy<true, PixelFormat::RGBA16F>,
+        MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
+        MortonCopy<true, PixelFormat::DXT23>,        MortonCopy<true, PixelFormat::DXT45>,
+        MortonCopy<true, PixelFormat::DXN1>,
 };
 
 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -127,6 +131,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
         MortonCopy<false, PixelFormat::A1B5G5R5>,
         MortonCopy<false, PixelFormat::R8>,
         MortonCopy<false, PixelFormat::RGBA16F>,
+        MortonCopy<false, PixelFormat::R11FG11FB10F>,
         // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
         nullptr,
         nullptr,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 6f08678ab..0f43e863d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -60,10 +60,11 @@ struct SurfaceParams {
         A1B5G5R5 = 3,
         R8 = 4,
         RGBA16F = 5,
-        DXT1 = 6,
-        DXT23 = 7,
-        DXT45 = 8,
-        DXN1 = 9, // This is also known as BC4
+        R11FG11FB10F = 6,
+        DXT1 = 7,
+        DXT23 = 8,
+        DXT45 = 9,
+        DXN1 = 10, // This is also known as BC4
 
         Max,
         Invalid = 255,
@@ -104,7 +105,8 @@ struct SurfaceParams {
             1, // A2B10G10R10
             1, // A1B5G5R5
             1, // R8
-            2, // RGBA16F
+            1, // RGBA16F
+            1, // R11FG11FB10F
             4, // DXT1
             4, // DXT23
             4, // DXT45
@@ -129,6 +131,7 @@ struct SurfaceParams {
             16,  // A1B5G5R5
             8,   // R8
             64,  // RGBA16F
+            32,  // R11FG11FB10F
             64,  // DXT1
             128, // DXT23
             128, // DXT45
@@ -151,6 +154,8 @@ struct SurfaceParams {
             return PixelFormat::A2B10G10R10;
         case Tegra::RenderTargetFormat::RGBA16_FLOAT:
             return PixelFormat::RGBA16F;
+        case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
+            return PixelFormat::R11FG11FB10F;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
@@ -182,6 +187,8 @@ struct SurfaceParams {
             return PixelFormat::R8;
         case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
             return PixelFormat::RGBA16F;
+        case Tegra::Texture::TextureFormat::BF10GF11RF11:
+            return PixelFormat::R11FG11FB10F;
         case Tegra::Texture::TextureFormat::DXT1:
             return PixelFormat::DXT1;
         case Tegra::Texture::TextureFormat::DXT23:
@@ -211,6 +218,8 @@ struct SurfaceParams {
             return Tegra::Texture::TextureFormat::R8;
         case PixelFormat::RGBA16F:
             return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
+        case PixelFormat::R11FG11FB10F:
+            return Tegra::Texture::TextureFormat::BF10GF11RF11;
         case PixelFormat::DXT1:
             return Tegra::Texture::TextureFormat::DXT1;
         case PixelFormat::DXT23:
@@ -243,6 +252,7 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
             return ComponentType::UNorm;
         case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+        case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
             return ComponentType::Float;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index eb8d37c9b..4a41e7798 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -893,6 +893,35 @@ private:
             }
             break;
         }
+
+        case OpCode::Type::Shift: {
+            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
+            std::string op_b;
+
+            if (instr.is_b_imm) {
+                op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
+            } else {
+                if (instr.is_b_gpr) {
+                    op_b += regs.GetRegisterAsInteger(instr.gpr20);
+                } else {
+                    op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer);
+                }
+            }
+
+            switch (opcode->GetId()) {
+            case OpCode::Id::SHL_C:
+            case OpCode::Id::SHL_R:
+            case OpCode::Id::SHL_IMM:
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1);
+                break;
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName());
+                UNREACHABLE();
+            }
+            }
+            break;
+        }
+
         case OpCode::Type::ScaledAdd: {
             std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
 
@@ -902,12 +931,12 @@ private:
             std::string op_b = instr.iscadd.negate_b ? "-" : "";
 
             if (instr.is_b_imm) {
-                op_b += '(' + std::to_string(instr.iscadd.GetImmediate()) + ')';
+                op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
             } else {
                 if (instr.is_b_gpr) {
                     op_b += regs.GetRegisterAsInteger(instr.gpr20);
                 } else {
-                    op_b += regs.GetUniform(instr.uniform, instr.gpr0);
+                    op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer);
                 }
             }
 
@@ -962,18 +991,20 @@ private:
                 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
 
                 std::string op_a =
-                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed);
+                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
 
                 if (instr.conversion.abs_a) {
                     op_a = "abs(" + op_a + ')';
                 }
 
-                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1);
+                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
+                                          1);
                 break;
             }
             case OpCode::Id::I2F_R: {
+                ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
                 std::string op_a =
-                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed);
+                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
 
                 if (instr.conversion.abs_a) {
                     op_a = "abs(" + op_a + ')';
@@ -983,6 +1014,8 @@ private:
                 break;
             }
             case OpCode::Id::F2F_R: {
+                // TODO(Subv): Implement rounding operations.
+                ASSERT_MSG(instr.conversion.f2f.rounding == 0, "Unimplemented rounding operation");
                 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
 
                 if (instr.conversion.abs_a) {
@@ -992,6 +1025,43 @@ private:
                 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
                 break;
             }
+            case OpCode::Id::F2I_R: {
+                std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
+
+                if (instr.conversion.abs_a) {
+                    op_a = "abs(" + op_a + ')';
+                }
+
+                using Tegra::Shader::FloatRoundingOp;
+                switch (instr.conversion.f2i.rounding) {
+                case FloatRoundingOp::None:
+                    break;
+                case FloatRoundingOp::Floor:
+                    op_a = "floor(" + op_a + ')';
+                    break;
+                case FloatRoundingOp::Ceil:
+                    op_a = "ceil(" + op_a + ')';
+                    break;
+                case FloatRoundingOp::Trunc:
+                    op_a = "trunc(" + op_a + ')';
+                    break;
+                default:
+                    NGLOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}",
+                                   static_cast<u32>(instr.conversion.f2i.rounding.Value()));
+                    UNREACHABLE();
+                    break;
+                }
+
+                if (instr.conversion.is_output_signed) {
+                    op_a = "int(" + op_a + ')';
+                } else {
+                    op_a = "uint(" + op_a + ')';
+                }
+
+                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
+                                          1);
+                break;
+            }
             default: {
                 NGLOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName());
                 UNREACHABLE();
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 2d2af5554..7bf9c4c4b 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -55,6 +55,7 @@ u32 BytesPerPixel(TextureFormat format) {
         return 16;
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
+    case TextureFormat::BF10GF11RF11:
         return 4;
     case TextureFormat::A1B5G5R5:
     case TextureFormat::B5G6R5:
@@ -92,6 +93,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
     case TextureFormat::B5G6R5:
     case TextureFormat::R8:
     case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::BF10GF11RF11:
         CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
                          unswizzled_data.data(), true, block_height);
         break;
@@ -118,6 +120,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     case TextureFormat::A1B5G5R5:
     case TextureFormat::B5G6R5:
     case TextureFormat::R8:
+    case TextureFormat::BF10GF11RF11:
         // TODO(Subv): For the time being just forward the same data without any decoding.
         rgba_data = texture_data;
         break;