1 files changed, 117 insertions, 85 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a35f0c0d8..a80148872 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -1,5 +1,5 @@
 // Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
+// Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #include <algorithm>
@@ -18,7 +18,7 @@ namespace Pica {
 namespace Rasterizer {
 
 static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
-    u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress());
+    u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
     u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
 
     // Assuming RGBA8 format until actual framebuffer format handling is implemented
@@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
 }
 
 static u32 GetDepth(int x, int y) {
-    u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
 
     // Assuming 16-bit depth buffer format until actual format handling is implemented
     return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
 }
 
 static void SetDepth(int x, int y, u16 value) {
-    u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
 
     // Assuming 16-bit depth buffer format until actual format handling is implemented
     *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
@@ -106,6 +106,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
     int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
     int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
 
+    auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
+
+    auto textures = registers.GetTextures();
+    auto tev_stages = registers.GetTevStages();
+
     // TODO: Not sure if looping through x first might be faster
     for (u16 y = min_y; y < max_y; y += 0x10) {
         for (u16 x = min_x; x < max_x; x += 0x10) {
@@ -129,6 +134,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
             if (w0 < 0 || w1 < 0 || w2 < 0)
                 continue;
 
+            auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
+                                                float24::FromFloat32(static_cast<float>(w1)),
+                                                float24::FromFloat32(static_cast<float>(w2)));
+            float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
+
             // Perspective correct attribute interpolation:
             // Attribute values cannot be calculated by simple linear interpolation since
             // they are not linear in screen space. For example, when interpolating a
@@ -145,19 +155,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
             //
             // The generalization to three vertices is straightforward in baricentric coordinates.
             auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
-                auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w,
-                                                 attr1 / v1.pos.w,
-                                                 attr2 / v2.pos.w);
-                auto w_inverse   = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
-                                                 float24::FromFloat32(1.f) / v1.pos.w,
-                                                 float24::FromFloat32(1.f) / v2.pos.w);
-                auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
-                                                             float24::FromFloat32(static_cast<float>(w1)),
-                                                             float24::FromFloat32(static_cast<float>(w2)));
-
+                auto attr_over_w = Math::MakeVec(attr0, attr1, attr2);
                 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
-                float24 interpolated_w_inverse   = Math::Dot(w_inverse,   baricentric_coordinates);
-                return interpolated_attr_over_w / interpolated_w_inverse;
+                return interpolated_attr_over_w * interpolated_w_inverse;
             };
 
             Math::Vec4<u8> primary_color{
@@ -167,60 +167,48 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
             };
 
-            Math::Vec4<u8> texture_color{};
-            float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
-            float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
-            if (registers.texturing_enable) {
-                // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
-                // of which is composed of four 2x2 subtiles each of which is composed of four texels.
-                // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
-                // texels are laid out in a 2x2 subtile like this:
-                // 2 3
-                // 0 1
-                //
-                // The full 8x8 tile has the texels arranged like this:
-                //
-                // 42 43 46 47 58 59 62 63
-                // 40 41 44 45 56 57 60 61
-                // 34 35 38 39 50 51 54 55
-                // 32 33 36 37 48 49 52 53
-                // 10 11 14 15 26 27 30 31
-                // 08 09 12 13 24 25 28 29
-                // 02 03 06 07 18 19 22 23
-                // 00 01 04 05 16 17 20 21
-
-                // TODO: This is currently hardcoded for RGB8
-                u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress());
-
-                // TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
-                // To be flexible in case different but similar patterns are used, we keep this
-                // somewhat inefficient code around for now.
-                int s = (int)(u * float24::FromFloat32(static_cast<float>(registers.texture0.width))).ToFloat32();
-                int t = (int)(v * float24::FromFloat32(static_cast<float>(registers.texture0.height))).ToFloat32();
-                int texel_index_within_tile = 0;
-                for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
-                    int sub_tile_width = 1 << block_size_index;
-                    int sub_tile_height = 1 << block_size_index;
-
-                    int sub_tile_index = (s & sub_tile_width) << block_size_index;
-                    sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index);
-                    texel_index_within_tile += sub_tile_index;
-                }
-
-                const int block_width = 8;
-                const int block_height = 8;
-
-                int coarse_s = (s / block_width) * block_width;
-                int coarse_t = (t / block_height) * block_height;
-
-                const int row_stride = registers.texture0.width * 3;
-                u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
-                texture_color.r() = source_ptr[2];
-                texture_color.g() = source_ptr[1];
-                texture_color.b() = source_ptr[0];
-                texture_color.a() = 0xFF;
-
-                DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
+            Math::Vec2<float24> uv[3];
+            uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
+            uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
+            uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
+            uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
+            uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
+            uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
+
+            Math::Vec4<u8> texture_color[3]{};
+            for (int i = 0; i < 3; ++i) {
+                const auto& texture = textures[i];
+                if (!texture.enabled)
+                    continue;
+
+                _dbg_assert_(HW_GPU, 0 != texture.config.address);
+
+                int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
+                int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
+                auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
+                    switch (mode) {
+                        case Regs::TextureConfig::ClampToEdge:
+                            val = std::max(val, 0);
+                            val = std::min(val, (int)size - 1);
+                            return val;
+
+                        case Regs::TextureConfig::Repeat:
+                            return (int)(((unsigned)val) % size);
+
+                        default:
+                            LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode);
+                            _dbg_assert_(HW_GPU, 0);
+                            return 0;
+                    }
+                };
+                s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
+                t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
+
+                u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
+                auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
+
+                texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
+                DebugUtils::DumpTexture(texture.config, texture_data);
             }
 
             // Texture environment - consists of 6 stages of color and alpha combining.
@@ -231,28 +219,35 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
             // with some basic arithmetic. Alpha combiners can be configured separately but work
             // analogously.
             Math::Vec4<u8> combiner_output;
-            for (auto tev_stage : registers.GetTevStages()) {
+            for (const auto& tev_stage : tev_stages) {
                 using Source = Regs::TevStageConfig::Source;
                 using ColorModifier = Regs::TevStageConfig::ColorModifier;
                 using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
                 using Operation = Regs::TevStageConfig::Operation;
 
-                auto GetColorSource = [&](Source source) -> Math::Vec3<u8> {
+                auto GetColorSource = [&](Source source) -> Math::Vec4<u8> {
                     switch (source) {
                     case Source::PrimaryColor:
-                        return primary_color.rgb();
+                        return primary_color;
 
                     case Source::Texture0:
-                        return texture_color.rgb();
+                        return texture_color[0];
+
+                    case Source::Texture1:
+                        return texture_color[1];
+
+                    case Source::Texture2:
+                        return texture_color[2];
 
                     case Source::Constant:
-                        return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b};
+                        return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
 
                     case Source::Previous:
-                        return combiner_output.rgb();
+                        return combiner_output;
 
                     default:
-                        ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source);
+                        LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
+                        _dbg_assert_(HW_GPU, 0);
                         return {};
                     }
                 };
@@ -263,7 +258,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                         return primary_color.a();
 
                     case Source::Texture0:
-                        return texture_color.a();
+                        return texture_color[0].a();
+
+                    case Source::Texture1:
+                        return texture_color[1].a();
+
+                    case Source::Texture2:
+                        return texture_color[2].a();
 
                     case Source::Constant:
                         return tev_stage.const_a;
@@ -272,18 +273,24 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                         return combiner_output.a();
 
                     default:
-                        ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source);
+                        LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source);
+                        _dbg_assert_(HW_GPU, 0);
                         return 0;
                     }
                 };
 
-                auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> {
+                auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
                     switch (factor)
                     {
                     case ColorModifier::SourceColor:
-                        return values;
+                        return values.rgb();
+
+                    case ColorModifier::SourceAlpha:
+                        return { values.a(), values.a(), values.a() };
+
                     default:
-                        ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
+                        LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor);
+                        _dbg_assert_(HW_GPU, 0);
                         return {};
                     }
                 };
@@ -292,8 +299,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     switch (factor) {
                     case AlphaModifier::SourceAlpha:
                         return value;
+
+                    case AlphaModifier::OneMinusSourceAlpha:
+                        return 255 - value;
+
                     default:
-                        ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
+                        LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor);
+                        _dbg_assert_(HW_GPU, 0);
                         return 0;
                     }
                 };
@@ -306,8 +318,21 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     case Operation::Modulate:
                         return ((input[0] * input[1]) / 255).Cast<u8>();
 
+                    case Operation::Add:
+                    {
+                        auto result = input[0] + input[1];
+                        result.r() = std::min(255, result.r());
+                        result.g() = std::min(255, result.g());
+                        result.b() = std::min(255, result.b());
+                        return result.Cast<u8>();
+                    }
+
+                    case Operation::Lerp:
+                        return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
+
                     default:
-                        ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op);
+                        LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
+                        _dbg_assert_(HW_GPU, 0);
                         return {};
                     }
                 };
@@ -320,8 +345,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     case Operation::Modulate:
                         return input[0] * input[1] / 255;
 
+                    case Operation::Add:
+                        return std::min(255, input[0] + input[1]);
+
+                    case Operation::Lerp:
+                        return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
+
                     default:
-                        ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op);
+                        LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
+                        _dbg_assert_(HW_GPU, 0);
                         return 0;
                     }
                 };