diff options
Diffstat (limited to 'src/video_core/rasterizer.cpp')
-rw-r--r-- | src/video_core/rasterizer.cpp | 202 |
1 files changed, 117 insertions, 85 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a35f0c0d8..a80148872 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include <algorithm> @@ -18,7 +18,7 @@ namespace Pica { namespace Rasterizer { static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { - u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress()); + u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress()))); u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); // Assuming RGBA8 format until actual framebuffer format handling is implemented @@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { } static u32 GetDepth(int x, int y) { - u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); + u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); // Assuming 16-bit depth buffer format until actual format handling is implemented return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); } static void SetDepth(int x, int y, u16 value) { - u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); + u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); // Assuming 16-bit depth buffer format until actual format handling is implemented *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; @@ -106,6 +106,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; + auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); + + auto textures = registers.GetTextures(); + auto tev_stages = registers.GetTevStages(); + // TODO: Not sure if looping through x first might be faster for (u16 y = min_y; y < max_y; y += 0x10) { for (u16 x = min_x; x < max_x; x += 0x10) { @@ -129,6 +134,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, if (w0 < 0 || w1 < 0 || w2 < 0) continue; + auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), + float24::FromFloat32(static_cast<float>(w1)), + float24::FromFloat32(static_cast<float>(w2))); + float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); + // Perspective correct attribute interpolation: // Attribute values cannot be calculated by simple linear interpolation since // they are not linear in screen space. For example, when interpolating a @@ -145,19 +155,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, // // The generalization to three vertices is straightforward in baricentric coordinates. auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { - auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, - attr1 / v1.pos.w, - attr2 / v2.pos.w); - auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w, - float24::FromFloat32(1.f) / v1.pos.w, - float24::FromFloat32(1.f) / v2.pos.w); - auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), - float24::FromFloat32(static_cast<float>(w1)), - float24::FromFloat32(static_cast<float>(w2))); - + auto attr_over_w = Math::MakeVec(attr0, attr1, attr2); float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); - float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); - return interpolated_attr_over_w / interpolated_w_inverse; + return interpolated_attr_over_w * interpolated_w_inverse; }; Math::Vec4<u8> primary_color{ @@ -167,60 +167,48 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) }; - Math::Vec4<u8> texture_color{}; - float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); - float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); - if (registers.texturing_enable) { - // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each - // of which is composed of four 2x2 subtiles each of which is composed of four texels. - // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. - // texels are laid out in a 2x2 subtile like this: - // 2 3 - // 0 1 - // - // The full 8x8 tile has the texels arranged like this: - // - // 42 43 46 47 58 59 62 63 - // 40 41 44 45 56 57 60 61 - // 34 35 38 39 50 51 54 55 - // 32 33 36 37 48 49 52 53 - // 10 11 14 15 26 27 30 31 - // 08 09 12 13 24 25 28 29 - // 02 03 06 07 18 19 22 23 - // 00 01 04 05 16 17 20 21 - - // TODO: This is currently hardcoded for RGB8 - u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); - - // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. - // To be flexible in case different but similar patterns are used, we keep this - // somewhat inefficient code around for now. - int s = (int)(u * float24::FromFloat32(static_cast<float>(registers.texture0.width))).ToFloat32(); - int t = (int)(v * float24::FromFloat32(static_cast<float>(registers.texture0.height))).ToFloat32(); - int texel_index_within_tile = 0; - for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { - int sub_tile_width = 1 << block_size_index; - int sub_tile_height = 1 << block_size_index; - - int sub_tile_index = (s & sub_tile_width) << block_size_index; - sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); - texel_index_within_tile += sub_tile_index; - } - - const int block_width = 8; - const int block_height = 8; - - int coarse_s = (s / block_width) * block_width; - int coarse_t = (t / block_height) * block_height; - - const int row_stride = registers.texture0.width * 3; - u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3; - texture_color.r() = source_ptr[2]; - texture_color.g() = source_ptr[1]; - texture_color.b() = source_ptr[0]; - texture_color.a() = 0xFF; - - DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data); + Math::Vec2<float24> uv[3]; + uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); + uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); + uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u()); + uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v()); + uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u()); + uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v()); + + Math::Vec4<u8> texture_color[3]{}; + for (int i = 0; i < 3; ++i) { + const auto& texture = textures[i]; + if (!texture.enabled) + continue; + + _dbg_assert_(HW_GPU, 0 != texture.config.address); + + int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); + int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); + auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { + switch (mode) { + case Regs::TextureConfig::ClampToEdge: + val = std::max(val, 0); + val = std::min(val, (int)size - 1); + return val; + + case Regs::TextureConfig::Repeat: + return (int)(((unsigned)val) % size); + + default: + LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode); + _dbg_assert_(HW_GPU, 0); + return 0; + } + }; + s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width); + t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height); + + u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress())); + auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); + + texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); + DebugUtils::DumpTexture(texture.config, texture_data); } // Texture environment - consists of 6 stages of color and alpha combining. @@ -231,28 +219,35 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, // with some basic arithmetic. Alpha combiners can be configured separately but work // analogously. Math::Vec4<u8> combiner_output; - for (auto tev_stage : registers.GetTevStages()) { + for (const auto& tev_stage : tev_stages) { using Source = Regs::TevStageConfig::Source; using ColorModifier = Regs::TevStageConfig::ColorModifier; using AlphaModifier = Regs::TevStageConfig::AlphaModifier; using Operation = Regs::TevStageConfig::Operation; - auto GetColorSource = [&](Source source) -> Math::Vec3<u8> { + auto GetColorSource = [&](Source source) -> Math::Vec4<u8> { switch (source) { case Source::PrimaryColor: - return primary_color.rgb(); + return primary_color; case Source::Texture0: - return texture_color.rgb(); + return texture_color[0]; + + case Source::Texture1: + return texture_color[1]; + + case Source::Texture2: + return texture_color[2]; case Source::Constant: - return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; + return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; case Source::Previous: - return combiner_output.rgb(); + return combiner_output; default: - ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source); + LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); + _dbg_assert_(HW_GPU, 0); return {}; } }; @@ -263,7 +258,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, return primary_color.a(); case Source::Texture0: - return texture_color.a(); + return texture_color[0].a(); + + case Source::Texture1: + return texture_color[1].a(); + + case Source::Texture2: + return texture_color[2].a(); case Source::Constant: return tev_stage.const_a; @@ -272,18 +273,24 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, return combiner_output.a(); default: - ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source); + LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source); + _dbg_assert_(HW_GPU, 0); return 0; } }; - auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> { + auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { switch (factor) { case ColorModifier::SourceColor: - return values; + return values.rgb(); + + case ColorModifier::SourceAlpha: + return { values.a(), values.a(), values.a() }; + default: - ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); + LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor); + _dbg_assert_(HW_GPU, 0); return {}; } }; @@ -292,8 +299,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, switch (factor) { case AlphaModifier::SourceAlpha: return value; + + case AlphaModifier::OneMinusSourceAlpha: + return 255 - value; + default: - ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); + LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor); + _dbg_assert_(HW_GPU, 0); return 0; } }; @@ -306,8 +318,21 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Operation::Modulate: return ((input[0] * input[1]) / 255).Cast<u8>(); + case Operation::Add: + { + auto result = input[0] + input[1]; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + return result.Cast<u8>(); + } + + case Operation::Lerp: + return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); + default: - ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op); + LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); + _dbg_assert_(HW_GPU, 0); return {}; } }; @@ -320,8 +345,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Operation::Modulate: return input[0] * input[1] / 255; + case Operation::Add: + return std::min(255, input[0] + input[1]); + + case Operation::Lerp: + return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; + default: - ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op); + LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); + _dbg_assert_(HW_GPU, 0); return 0; } }; |