summaryrefslogtreecommitdiffstats
path: root/src/video_core/rasterizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/rasterizer.cpp')
-rw-r--r--src/video_core/rasterizer.cpp202
1 files changed, 117 insertions, 85 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a35f0c0d8..a80148872 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -1,5 +1,5 @@
// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
+// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
@@ -18,7 +18,7 @@ namespace Pica {
namespace Rasterizer {
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
- u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress());
+ u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
// Assuming RGBA8 format until actual framebuffer format handling is implemented
@@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
}
static u32 GetDepth(int x, int y) {
- u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
+ u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
// Assuming 16-bit depth buffer format until actual format handling is implemented
return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
}
static void SetDepth(int x, int y, u16 value) {
- u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
+ u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
// Assuming 16-bit depth buffer format until actual format handling is implemented
*(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
@@ -106,6 +106,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
+ auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
+
+ auto textures = registers.GetTextures();
+ auto tev_stages = registers.GetTevStages();
+
// TODO: Not sure if looping through x first might be faster
for (u16 y = min_y; y < max_y; y += 0x10) {
for (u16 x = min_x; x < max_x; x += 0x10) {
@@ -129,6 +134,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
if (w0 < 0 || w1 < 0 || w2 < 0)
continue;
+ auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
+ float24::FromFloat32(static_cast<float>(w1)),
+ float24::FromFloat32(static_cast<float>(w2)));
+ float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
+
// Perspective correct attribute interpolation:
// Attribute values cannot be calculated by simple linear interpolation since
// they are not linear in screen space. For example, when interpolating a
@@ -145,19 +155,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
//
// The generalization to three vertices is straightforward in baricentric coordinates.
auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
- auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w,
- attr1 / v1.pos.w,
- attr2 / v2.pos.w);
- auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
- float24::FromFloat32(1.f) / v1.pos.w,
- float24::FromFloat32(1.f) / v2.pos.w);
- auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
- float24::FromFloat32(static_cast<float>(w1)),
- float24::FromFloat32(static_cast<float>(w2)));
-
+ auto attr_over_w = Math::MakeVec(attr0, attr1, attr2);
float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
- float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates);
- return interpolated_attr_over_w / interpolated_w_inverse;
+ return interpolated_attr_over_w * interpolated_w_inverse;
};
Math::Vec4<u8> primary_color{
@@ -167,60 +167,48 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
};
- Math::Vec4<u8> texture_color{};
- float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
- float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
- if (registers.texturing_enable) {
- // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
- // of which is composed of four 2x2 subtiles each of which is composed of four texels.
- // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
- // texels are laid out in a 2x2 subtile like this:
- // 2 3
- // 0 1
- //
- // The full 8x8 tile has the texels arranged like this:
- //
- // 42 43 46 47 58 59 62 63
- // 40 41 44 45 56 57 60 61
- // 34 35 38 39 50 51 54 55
- // 32 33 36 37 48 49 52 53
- // 10 11 14 15 26 27 30 31
- // 08 09 12 13 24 25 28 29
- // 02 03 06 07 18 19 22 23
- // 00 01 04 05 16 17 20 21
-
- // TODO: This is currently hardcoded for RGB8
- u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress());
-
- // TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
- // To be flexible in case different but similar patterns are used, we keep this
- // somewhat inefficient code around for now.
- int s = (int)(u * float24::FromFloat32(static_cast<float>(registers.texture0.width))).ToFloat32();
- int t = (int)(v * float24::FromFloat32(static_cast<float>(registers.texture0.height))).ToFloat32();
- int texel_index_within_tile = 0;
- for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
- int sub_tile_width = 1 << block_size_index;
- int sub_tile_height = 1 << block_size_index;
-
- int sub_tile_index = (s & sub_tile_width) << block_size_index;
- sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index);
- texel_index_within_tile += sub_tile_index;
- }
-
- const int block_width = 8;
- const int block_height = 8;
-
- int coarse_s = (s / block_width) * block_width;
- int coarse_t = (t / block_height) * block_height;
-
- const int row_stride = registers.texture0.width * 3;
- u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
- texture_color.r() = source_ptr[2];
- texture_color.g() = source_ptr[1];
- texture_color.b() = source_ptr[0];
- texture_color.a() = 0xFF;
-
- DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
+ Math::Vec2<float24> uv[3];
+ uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
+ uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
+ uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
+ uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
+ uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
+ uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
+
+ Math::Vec4<u8> texture_color[3]{};
+ for (int i = 0; i < 3; ++i) {
+ const auto& texture = textures[i];
+ if (!texture.enabled)
+ continue;
+
+ _dbg_assert_(HW_GPU, 0 != texture.config.address);
+
+ int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
+ int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
+ auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
+ switch (mode) {
+ case Regs::TextureConfig::ClampToEdge:
+ val = std::max(val, 0);
+ val = std::min(val, (int)size - 1);
+ return val;
+
+ case Regs::TextureConfig::Repeat:
+ return (int)(((unsigned)val) % size);
+
+ default:
+ LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode);
+ _dbg_assert_(HW_GPU, 0);
+ return 0;
+ }
+ };
+ s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
+ t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
+
+ u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
+ auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
+
+ texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
+ DebugUtils::DumpTexture(texture.config, texture_data);
}
// Texture environment - consists of 6 stages of color and alpha combining.
@@ -231,28 +219,35 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
// with some basic arithmetic. Alpha combiners can be configured separately but work
// analogously.
Math::Vec4<u8> combiner_output;
- for (auto tev_stage : registers.GetTevStages()) {
+ for (const auto& tev_stage : tev_stages) {
using Source = Regs::TevStageConfig::Source;
using ColorModifier = Regs::TevStageConfig::ColorModifier;
using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
using Operation = Regs::TevStageConfig::Operation;
- auto GetColorSource = [&](Source source) -> Math::Vec3<u8> {
+ auto GetColorSource = [&](Source source) -> Math::Vec4<u8> {
switch (source) {
case Source::PrimaryColor:
- return primary_color.rgb();
+ return primary_color;
case Source::Texture0:
- return texture_color.rgb();
+ return texture_color[0];
+
+ case Source::Texture1:
+ return texture_color[1];
+
+ case Source::Texture2:
+ return texture_color[2];
case Source::Constant:
- return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b};
+ return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
case Source::Previous:
- return combiner_output.rgb();
+ return combiner_output;
default:
- ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source);
+ LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
+ _dbg_assert_(HW_GPU, 0);
return {};
}
};
@@ -263,7 +258,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
return primary_color.a();
case Source::Texture0:
- return texture_color.a();
+ return texture_color[0].a();
+
+ case Source::Texture1:
+ return texture_color[1].a();
+
+ case Source::Texture2:
+ return texture_color[2].a();
case Source::Constant:
return tev_stage.const_a;
@@ -272,18 +273,24 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
return combiner_output.a();
default:
- ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source);
+ LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source);
+ _dbg_assert_(HW_GPU, 0);
return 0;
}
};
- auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> {
+ auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
switch (factor)
{
case ColorModifier::SourceColor:
- return values;
+ return values.rgb();
+
+ case ColorModifier::SourceAlpha:
+ return { values.a(), values.a(), values.a() };
+
default:
- ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
+ LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor);
+ _dbg_assert_(HW_GPU, 0);
return {};
}
};
@@ -292,8 +299,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
switch (factor) {
case AlphaModifier::SourceAlpha:
return value;
+
+ case AlphaModifier::OneMinusSourceAlpha:
+ return 255 - value;
+
default:
- ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
+ LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor);
+ _dbg_assert_(HW_GPU, 0);
return 0;
}
};
@@ -306,8 +318,21 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
case Operation::Modulate:
return ((input[0] * input[1]) / 255).Cast<u8>();
+ case Operation::Add:
+ {
+ auto result = input[0] + input[1];
+ result.r() = std::min(255, result.r());
+ result.g() = std::min(255, result.g());
+ result.b() = std::min(255, result.b());
+ return result.Cast<u8>();
+ }
+
+ case Operation::Lerp:
+ return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
+
default:
- ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op);
+ LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
+ _dbg_assert_(HW_GPU, 0);
return {};
}
};
@@ -320,8 +345,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
case Operation::Modulate:
return input[0] * input[1] / 255;
+ case Operation::Add:
+ return std::min(255, input[0] + input[1]);
+
+ case Operation::Lerp:
+ return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
+
default:
- ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op);
+ LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
+ _dbg_assert_(HW_GPU, 0);
return 0;
}
};