From 414b0741c445a7960f9ad1ee4a5672f8af4760db Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sun, 8 Mar 2015 12:05:17 -0500
Subject: GPU: Implemented more depth buffer formats.

This fixes the horizontal lines in Picross E, Cubic Ninja, Cave Story 3D and possibly others
---
 src/core/hw/gpu.cpp           |  4 +--
 src/core/hw/gpu.h             |  6 ++---
 src/video_core/color.h        | 57 ++++++++++++++++++++++++++++++++++++++++
 src/video_core/pica.h         |  7 +++++
 src/video_core/rasterizer.cpp | 60 ++++++++++++++++++++++++++++++++++++-------
 5 files changed, 120 insertions(+), 14 deletions(-)

(limited to 'src')
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 424ce2ca7..b7102b874 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -81,9 +81,9 @@ inline void Write(u32 addr, const T data) {
             if (config.fill_24bit) {
                 // fill with 24-bit values
                 for (u8* ptr = start; ptr < end; ptr += 3) {
-                    ptr[0] = config.value_24bit_b;
+                    ptr[0] = config.value_24bit_r;
                     ptr[1] = config.value_24bit_g;
-                    ptr[2] = config.value_24bit_r;
+                    ptr[2] = config.value_24bit_b;
                 }
             } else if (config.fill_32bit) {
                 // fill with 32-bit values
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 737b1e968..5ca4a5450 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -100,10 +100,10 @@ struct Regs {
             // Set to 1 upon completion.
             BitField<0, 1, u32> finished;
 
-            // 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values
+            // If both of these bits are unset, then it will fill the memory with a 16 bit value
+            // 1: fill with 24-bit wide values
             BitField<8, 1, u32> fill_24bit;
-
-            // 0: fill with 16-bit wide values; 1: fill with 32-bit wide values
+            // 1: fill with 32-bit wide values
             BitField<9, 1, u32> fill_32bit;
         };
 
diff --git a/src/video_core/color.h b/src/video_core/color.h
index 35da901f2..35b56efc0 100644
--- a/src/video_core/color.h
+++ b/src/video_core/color.h
@@ -100,6 +100,33 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
         Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF) };
 }
 
+/**
+ * Decode a depth value stored in D16 format
+ * @param bytes Pointer to encoded source value
+ * @return Depth value as an u32
+ */
+inline const u32 DecodeD16(const u8* bytes) {
+    return *reinterpret_cast<const u16_le*>(bytes);
+}
+
+/**
+ * Decode a depth value stored in D24 format
+ * @param bytes Pointer to encoded source value
+ * @return Depth value as an u32
+ */
+inline const u32 DecodeD24(const u8* bytes) {
+    return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
+}
+
+/**
+ * Decode a depth value and a stencil value stored in D24S8 format
+ * @param bytes Pointer to encoded source values
+ * @return Resulting values stored as a Math::Vec2
+ */
+inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
+    return { (bytes[2] << 16) | (bytes[1] << 8) | bytes[0], bytes[3] };
+}
+
 /**
  * Encode a color as RGBA8 format
  * @param color Source color to encode
@@ -153,4 +180,34 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
         (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
 }
 
+/**
+ * Encode a depth value as D16 format
+ * @param value Source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD16(u32 value, u8* bytes) {
+    *reinterpret_cast<u16_le*>(bytes) = value & 0xFFFF;
+}
+
+/**
+ * Encode a depth value as D24 format
+ * @param value Source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD24(u32 value, u8* bytes) {
+    bytes[0] = value & 0xFF;
+    bytes[1] = (value >> 8) & 0xFF;
+    bytes[2] = (value >> 16) & 0xFF;
+}
+
+/**
+ * Encode depth and stencil values as D24S8 format
+ * @param depth Source depth values to encode
+ * @param stencil Source stencil value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
+    *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
+}
+
 } // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b14de9278..6549693f5 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -418,6 +418,13 @@ struct Regs {
             RGBA4    = 4,
         };
 
+        enum DepthFormat : u32 {
+            D16     = 0,
+        
+            D24     = 2,
+            D24S8   = 3
+        };
+
         INSERT_PADDING_WORDS(0x6);
 
         u32 depth_format;
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 5861c1926..dc32128c6 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -91,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
     }
 
     return {};
- }
+}
 
 static u32 GetDepth(int x, int y) {
     const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
@@ -100,23 +100,65 @@ static u32 GetDepth(int x, int y) {
     y = (registers.framebuffer.height - y);
     
     const u32 coarse_y = y & ~7;
-    u32 stride = registers.framebuffer.width * 2;
 
-    // Assuming 16-bit depth buffer format until actual format handling is implemented
-    return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
+    switch (registers.framebuffer.depth_format) {
+        case registers.framebuffer.D16:
+        {
+            u32 stride = registers.framebuffer.width * 2;
+            return Color::DecodeD16(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
+        }
+        case registers.framebuffer.D24:
+        {
+            u32 stride = registers.framebuffer.width * 3;
+            u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
+            return Color::DecodeD24(address);
+        }
+        case registers.framebuffer.D24S8:
+        {
+            u32 stride = registers.framebuffer.width * 4;
+            return Color::DecodeD24S8(depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride).x;
+        }
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+            UNIMPLEMENTED();
+            return 0;
+    }
 }
 
-static void SetDepth(int x, int y, u16 value) {
+static void SetDepth(int x, int y, u32 value) {
     const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
     u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
 
     y = (registers.framebuffer.height - y);
 
     const u32 coarse_y = y & ~7;
-    u32 stride = registers.framebuffer.width * 2;
 
-    // Assuming 16-bit depth buffer format until actual format handling is implemented
-    *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
+    switch (registers.framebuffer.depth_format) {
+        case registers.framebuffer.D16:
+        {
+            u32 stride = registers.framebuffer.width * 2;
+            Color::EncodeD16(value, depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
+            break;
+        }
+        case registers.framebuffer.D24:
+        {
+            u32 stride = registers.framebuffer.width * 3;
+            u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
+            Color::EncodeD24(value, address);
+            break;
+        }
+        case registers.framebuffer.D24S8:
+        {
+            u32 stride = registers.framebuffer.width * 4;
+            // TODO(Subv): Implement the stencil buffer
+            Color::EncodeD24S8(value, 0, depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride);
+            break;
+        }
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+            UNIMPLEMENTED();
+            break;
+    }
 }
 
 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
@@ -595,7 +637,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                 u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
                             v1.screenpos[2].ToFloat32() * w1 +
                             v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
-                u16 ref_z = GetDepth(x >> 4, y >> 4);
+                u32 ref_z = GetDepth(x >> 4, y >> 4);
 
                 bool pass = false;
 
-- 
cgit v1.2.3


From 155cc80e3b39f51cb75c6ab2bf9203fbfe444be3 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sun, 8 Mar 2015 20:07:02 -0500
Subject: Frontend/Qt: Allow the framebuffer widget to inspect the depth buffer

---
 src/citra_qt/debugger/graphics_framebuffer.cpp | 63 ++++++++++++++++++++++++--
 src/citra_qt/debugger/graphics_framebuffer.h   |  8 +++-
 2 files changed, 66 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index 5bd6c0235..2985cadb1 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -27,6 +27,7 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
 
     framebuffer_source_list = new QComboBox;
     framebuffer_source_list->addItem(tr("Active Render Target"));
+    framebuffer_source_list->addItem(tr("Active Depth Buffer"));
     framebuffer_source_list->addItem(tr("Custom"));
     framebuffer_source_list->setCurrentIndex(static_cast<int>(framebuffer_source));
 
@@ -49,6 +50,9 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
     framebuffer_format_control->addItem(tr("RGB5A1"));
     framebuffer_format_control->addItem(tr("RGB565"));
     framebuffer_format_control->addItem(tr("RGBA4"));
+    framebuffer_format_control->addItem(tr("D16"));
+    framebuffer_format_control->addItem(tr("D24"));
+    framebuffer_format_control->addItem(tr("D24S8"));
 
     // TODO: This QLabel should shrink the image to the available space rather than just expanding...
     framebuffer_picture_label = new QLabel;
@@ -173,7 +177,6 @@ void GraphicsFramebufferWidget::OnUpdate()
         // TODO: Store a reference to the registers in the debug context instead of accessing them directly...
 
         auto framebuffer = Pica::registers.framebuffer;
-        using Framebuffer = decltype(framebuffer);
 
         framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
         framebuffer_width = framebuffer.GetWidth();
@@ -184,6 +187,18 @@ void GraphicsFramebufferWidget::OnUpdate()
         break;
     }
 
+    case Source::DepthBuffer:
+    {
+        auto framebuffer = Pica::registers.framebuffer;
+
+        framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress();
+        framebuffer_width = framebuffer.GetWidth();
+        framebuffer_height = framebuffer.GetHeight();
+        framebuffer_format = Format::D16;
+
+        break;
+    }
+
     case Source::Custom:
     {
         // Keep user-specified values
@@ -197,15 +212,16 @@ void GraphicsFramebufferWidget::OnUpdate()
 
     // TODO: Implement a good way to visualize alpha components!
     // TODO: Unify this decoding code with the texture decoder
-    u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer_format));
+    u32 bytes_per_pixel = GraphicsFramebufferWidget::BytesPerPixel(framebuffer_format);
 
     QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
-    u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
+    u8* buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
+
     for (unsigned int y = 0; y < framebuffer_height; ++y) {
         for (unsigned int x = 0; x < framebuffer_width; ++x) {
             const u32 coarse_y = y & ~7;
             u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel;
-            const u8* pixel = color_buffer + offset;
+            const u8* pixel = buffer + offset;
             Math::Vec4<u8> color = { 0, 0, 0, 0 };
 
             switch (framebuffer_format) {
@@ -224,6 +240,29 @@ void GraphicsFramebufferWidget::OnUpdate()
             case Format::RGBA4:
                 color = Color::DecodeRGBA4(pixel);
                 break;
+            case Format::D16:
+            {
+                u32 data = Color::DecodeD16(pixel);
+                color.r() = data & 0xFF;
+                color.g() = (data >> 8) & 0xFF;
+                break;
+            }
+            case Format::D24:
+            {
+                u32 data = Color::DecodeD24(pixel);
+                color.r() = data & 0xFF;
+                color.g() = (data >> 8) & 0xFF;
+                color.b() = (data >> 16) & 0xFF;
+                break;
+            }
+            case Format::D24S8:
+            {
+                Math::Vec2<u32> data = Color::DecodeD24S8(pixel);
+                color.r() = data.x & 0xFF;
+                color.g() = (data.x >> 8) & 0xFF;
+                color.b() = (data.x >> 16) & 0xFF;
+                break;
+            }
             default:
                 qDebug() << "Unknown fb color format " << static_cast<int>(framebuffer_format);
                 break;
@@ -240,3 +279,19 @@ void GraphicsFramebufferWidget::OnUpdate()
     framebuffer_format_control->setCurrentIndex(static_cast<int>(framebuffer_format));
     framebuffer_picture_label->setPixmap(pixmap);
 }
+
+u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format format) {
+    switch (format) {
+        case Format::RGBA8:
+        case Format::D24S8:
+            return 4;
+        case Format::RGB8:
+        case Format::D24:
+            return 3;
+        case Format::RGB5A1:
+        case Format::RGB565:
+        case Format::RGBA4:
+        case Format::D16:
+            return 2;
+    }
+}
\ No newline at end of file
diff --git a/src/citra_qt/debugger/graphics_framebuffer.h b/src/citra_qt/debugger/graphics_framebuffer.h
index 15ebd1f7d..dff91d131 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.h
+++ b/src/citra_qt/debugger/graphics_framebuffer.h
@@ -21,7 +21,8 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
 
     enum class Source {
         PicaTarget = 0,
-        Custom = 1,
+        DepthBuffer = 1,
+        Custom = 2,
 
         // TODO: Add GPU framebuffer sources!
     };
@@ -32,8 +33,13 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
         RGB5A1   = 2,
         RGB565   = 3,
         RGBA4    = 4,
+        D16      = 5,
+        D24      = 6,
+        D24S8    = 7
     };
 
+    static u32 BytesPerPixel(Format format);
+
 public:
     GraphicsFramebufferWidget(std::shared_ptr<Pica::DebugContext> debug_context, QWidget* parent = nullptr);
 
-- 
cgit v1.2.3


From 1248e291f0c9a29734b0f5175df8fa675cce930c Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sun, 8 Mar 2015 21:31:15 -0500
Subject: GPU: Added the stencil test structure to the Pica Regs struct.

---
 src/citra_qt/debugger/graphics_framebuffer.cpp |  6 +--
 src/citra_qt/debugger/graphics_framebuffer.h   |  6 +--
 src/core/hw/gpu.cpp                            |  4 +-
 src/core/hw/gpu.h                              |  6 +--
 src/video_core/color.h                         | 18 ++++-----
 src/video_core/pica.h                          | 43 +++++++++++++++-----
 src/video_core/rasterizer.cpp                  | 54 +++++++++++---------------
 7 files changed, 76 insertions(+), 61 deletions(-)

(limited to 'src')

diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index 2985cadb1..d621d7204 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -176,7 +176,7 @@ void GraphicsFramebufferWidget::OnUpdate()
     {
         // TODO: Store a reference to the registers in the debug context instead of accessing them directly...
 
-        auto framebuffer = Pica::registers.framebuffer;
+        const auto& framebuffer = Pica::registers.framebuffer;
 
         framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
         framebuffer_width = framebuffer.GetWidth();
@@ -189,7 +189,7 @@ void GraphicsFramebufferWidget::OnUpdate()
 
     case Source::DepthBuffer:
     {
-        auto framebuffer = Pica::registers.framebuffer;
+        const auto& framebuffer = Pica::registers.framebuffer;
 
         framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress();
         framebuffer_width = framebuffer.GetWidth();
@@ -294,4 +294,4 @@ u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format f
         case Format::D16:
             return 2;
     }
-}
\ No newline at end of file
+}
diff --git a/src/citra_qt/debugger/graphics_framebuffer.h b/src/citra_qt/debugger/graphics_framebuffer.h
index dff91d131..4cb396ffe 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.h
+++ b/src/citra_qt/debugger/graphics_framebuffer.h
@@ -20,9 +20,9 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
     using Event = Pica::DebugContext::Event;
 
     enum class Source {
-        PicaTarget = 0,
-        DepthBuffer = 1,
-        Custom = 2,
+        PicaTarget   = 0,
+        DepthBuffer  = 1,
+        Custom       = 2,
 
         // TODO: Add GPU framebuffer sources!
     };
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index b7102b874..424ce2ca7 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -81,9 +81,9 @@ inline void Write(u32 addr, const T data) {
             if (config.fill_24bit) {
                 // fill with 24-bit values
                 for (u8* ptr = start; ptr < end; ptr += 3) {
-                    ptr[0] = config.value_24bit_r;
+                    ptr[0] = config.value_24bit_b;
                     ptr[1] = config.value_24bit_g;
-                    ptr[2] = config.value_24bit_b;
+                    ptr[2] = config.value_24bit_r;
                 }
             } else if (config.fill_32bit) {
                 // fill with 32-bit values
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 5ca4a5450..737b1e968 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -100,10 +100,10 @@ struct Regs {
             // Set to 1 upon completion.
             BitField<0, 1, u32> finished;
 
-            // If both of these bits are unset, then it will fill the memory with a 16 bit value
-            // 1: fill with 24-bit wide values
+            // 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values
             BitField<8, 1, u32> fill_24bit;
-            // 1: fill with 32-bit wide values
+
+            // 0: fill with 16-bit wide values; 1: fill with 32-bit wide values
             BitField<9, 1, u32> fill_32bit;
         };
 
diff --git a/src/video_core/color.h b/src/video_core/color.h
index 35b56efc0..14ade74f2 100644
--- a/src/video_core/color.h
+++ b/src/video_core/color.h
@@ -105,7 +105,7 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
  * @param bytes Pointer to encoded source value
  * @return Depth value as an u32
  */
-inline const u32 DecodeD16(const u8* bytes) {
+inline u32 DecodeD16(const u8* bytes) {
     return *reinterpret_cast<const u16_le*>(bytes);
 }
 
@@ -114,7 +114,7 @@ inline const u32 DecodeD16(const u8* bytes) {
  * @param bytes Pointer to encoded source value
  * @return Depth value as an u32
  */
-inline const u32 DecodeD24(const u8* bytes) {
+inline u32 DecodeD24(const u8* bytes) {
     return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
 }
 
@@ -181,8 +181,8 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
 }
 
 /**
- * Encode a depth value as D16 format
- * @param value Source depth value to encode
+ * Encode a 16 bit depth value as D16 format
+ * @param value 16 bit source depth value to encode
  * @param bytes Pointer where to store the encoded value
  */
 inline void EncodeD16(u32 value, u8* bytes) {
@@ -190,8 +190,8 @@ inline void EncodeD16(u32 value, u8* bytes) {
 }
 
 /**
- * Encode a depth value as D24 format
- * @param value Source depth value to encode
+ * Encode a 24 bit depth value as D24 format
+ * @param value 24 bit source depth value to encode
  * @param bytes Pointer where to store the encoded value
  */
 inline void EncodeD24(u32 value, u8* bytes) {
@@ -201,9 +201,9 @@ inline void EncodeD24(u32 value, u8* bytes) {
 }
 
 /**
- * Encode depth and stencil values as D24S8 format
- * @param depth Source depth values to encode
- * @param stencil Source stencil value to encode
+ * Encode a 24 bit depth and 8 bit stencil values as D24S8 format
+ * @param depth 24 bit source depth value to encode
+ * @param stencil 8 bit source stencil value to encode
  * @param bytes Pointer where to store the encoded value
  */
 inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 6549693f5..fe20cd77d 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -393,7 +393,15 @@ struct Regs {
             BitField< 8, 8, u32> ref;
         } alpha_test;
 
-        INSERT_PADDING_WORDS(0x2);
+        union {
+            BitField< 0, 1, u32> stencil_test_enable;
+            BitField< 4, 3, CompareFunc> stencil_test_func;
+            BitField< 8, 8, u32> stencil_replacement_value;
+            BitField<16, 8, u32> stencil_reference_value;
+            BitField<24, 8, u32> stencil_mask;
+        } stencil_test;
+
+        INSERT_PADDING_WORDS(0x1);
 
         union {
             BitField< 0, 1, u32> depth_test_enable;
@@ -408,6 +416,30 @@ struct Regs {
         INSERT_PADDING_WORDS(0x8);
     } output_merger;
 
+    enum DepthFormat : u32 {
+        D16    = 0,
+
+        D24    = 2,
+        D24S8  = 3
+    };
+
+    /*
+     * Returns the number of bytes in the specified depth format
+     */
+    static u32 BytesPerDepthPixel(DepthFormat format) {
+        switch (format) {
+        case DepthFormat::D16:
+            return 2;
+        case DepthFormat::D24:
+            return 3;
+        case DepthFormat::D24S8:
+            return 4;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
+            UNIMPLEMENTED();
+        }
+    }
+
     struct {
         // Components are laid out in reverse byte order, most significant bits first.
         enum ColorFormat : u32 {
@@ -418,16 +450,9 @@ struct Regs {
             RGBA4    = 4,
         };
 
-        enum DepthFormat : u32 {
-            D16     = 0,
-        
-            D24     = 2,
-            D24S8   = 3
-        };
-
         INSERT_PADDING_WORDS(0x6);
 
-        u32 depth_format;
+        DepthFormat depth_format;
         BitField<16, 3, u32> color_format;
 
         INSERT_PADDING_WORDS(0x4);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index dc32128c6..dd46f0ec3 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -100,24 +100,19 @@ static u32 GetDepth(int x, int y) {
     y = (registers.framebuffer.height - y);
     
     const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
+    u32 stride = registers.framebuffer.width * bytes_per_pixel;
+
+    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* src_pixel = depth_buffer + src_offset;
 
     switch (registers.framebuffer.depth_format) {
-        case registers.framebuffer.D16:
-        {
-            u32 stride = registers.framebuffer.width * 2;
-            return Color::DecodeD16(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
-        }
-        case registers.framebuffer.D24:
-        {
-            u32 stride = registers.framebuffer.width * 3;
-            u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
-            return Color::DecodeD24(address);
-        }
-        case registers.framebuffer.D24S8:
-        {
-            u32 stride = registers.framebuffer.width * 4;
-            return Color::DecodeD24S8(depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride).x;
-        }
+        case Pica::Regs::DepthFormat::D16:
+            return Color::DecodeD16(src_pixel);
+        case Pica::Regs::DepthFormat::D24:
+            return Color::DecodeD24(src_pixel);
+        case Pica::Regs::DepthFormat::D24S8:
+            return Color::DecodeD24S8(src_pixel).x;
         default:
             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
             UNIMPLEMENTED();
@@ -132,28 +127,23 @@ static void SetDepth(int x, int y, u32 value) {
     y = (registers.framebuffer.height - y);
 
     const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
+    u32 stride = registers.framebuffer.width * bytes_per_pixel;
+
+    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* dst_pixel = depth_buffer + dst_offset;
 
     switch (registers.framebuffer.depth_format) {
-        case registers.framebuffer.D16:
-        {
-            u32 stride = registers.framebuffer.width * 2;
-            Color::EncodeD16(value, depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
+        case Pica::Regs::DepthFormat::D16:
+            Color::EncodeD16(value, dst_pixel);
             break;
-        }
-        case registers.framebuffer.D24:
-        {
-            u32 stride = registers.framebuffer.width * 3;
-            u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
-            Color::EncodeD24(value, address);
+        case Pica::Regs::DepthFormat::D24:
+            Color::EncodeD24(value, dst_pixel);
             break;
-        }
-        case registers.framebuffer.D24S8:
-        {
-            u32 stride = registers.framebuffer.width * 4;
+        case Pica::Regs::DepthFormat::D24S8:
             // TODO(Subv): Implement the stencil buffer
-            Color::EncodeD24S8(value, 0, depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride);
+            Color::EncodeD24S8(value, 0, dst_pixel);
             break;
-        }
         default:
             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
             UNIMPLEMENTED();
-- 
cgit v1.2.3