From fb13a8600c7df4707fcfcf041c5f7f00ad67583d Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 20 Nov 2021 07:23:22 +0100 Subject: [PATCH] early-access version 2226 --- README.md | 2 +- .../host_shaders/convert_abgr8_to_d24s8.frag | 4 +- .../convert_b10g11r11_to_d24s8.frag | 4 +- .../host_shaders/convert_r16g16_to_d24s8.frag | 4 +- .../renderer_opengl/gl_texture_cache.cpp | 4 +- .../renderer_opengl/gl_texture_cache.h | 7 +- .../renderer_vulkan/vk_texture_cache.cpp | 174 ++++++++++++++++++ .../renderer_vulkan/vk_texture_cache.h | 11 +- src/video_core/texture_cache/texture_cache.h | 4 +- .../texture_cache/texture_cache_base.h | 2 - 10 files changed, 200 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index f86f31b11..7b61f1001 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2225. +This is the source code for early-access 2226. ## Legal Notice diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag index f7657e50a..4e4ab6a26 100755 --- a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #version 450 -// #extension GL_ARB_shader_stencil_export : require +#extension GL_ARB_shader_stencil_export : require layout(binding = 0) uniform sampler2D color_texture; @@ -13,5 +13,5 @@ void main() { uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b; gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f); - // gl_FragStencilRefARB = int(color.a); + gl_FragStencilRefARB = int(color.a); } diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag index b7358c15c..2999a84cf 100755 --- a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #version 450 -// #extension GL_ARB_shader_stencil_export : require +#extension GL_ARB_shader_stencil_export : require layout(binding = 0) uniform sampler2D color_texture; @@ -15,5 +15,5 @@ void main() { | (uint(color.r * (exp2(11) - 1.0f))); gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); + gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); } diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag index 7b1b914f6..3df70575e 100755 --- a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #version 450 -// #extension GL_ARB_shader_stencil_export : require +#extension GL_ARB_shader_stencil_export : require layout(binding = 0) uniform sampler2D color_texture; @@ -14,5 +14,5 @@ void main() { | (uint(color.g * (exp2(16) - 1.0f)) << 16); gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); + gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 3dbdcc2c4..406e8086d 100755 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -527,8 +527,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, } } -void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, - std::span copies) { +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, + std::span copies) { LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); format_conversion_pass.ConvertImage(dst, src, copies); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index c0534b1f1..37d5e6a6b 100755 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -84,9 +84,13 @@ public: u64 GetDeviceLocalMemory() const; + bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { + return true; + } + void CopyImage(Image& dst, Image& src, std::span copies); - void ConvertImage(Image& dst, Image& src, std::span copies); + void ReinterpretImage(Image& dst, Image& src, std::span copies); void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { UNIMPLEMENTED(); @@ -339,7 +343,6 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; - static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f99d1d647..e5637db05 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -313,6 +313,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } +[[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src, + VkImageAspectFlags aspect_mask) noexcept { + return VkBufferImageCopy{ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = MakeImageSubresourceLayers( + is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask), + .imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset), + .imageExtent = MakeExtent3D(copy.extent), + }; +} + [[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies( std::span copies, size_t buffer_offset) { std::vector result(copies.size()); @@ -759,6 +772,167 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { return staging_buffer_pool.Request(size, MemoryUsage::Download); } +bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { + if (VideoCore::Surface::GetFormatType(dst.info.format) == + VideoCore::Surface::SurfaceType::DepthStencil) { + return !device.IsExtShaderStencilExportSupported(); + } + return false; +} + +[[nodiscard]] size_t NextPow2(size_t value) { + return static_cast(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U))); +} + +VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { + const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); + if (buffer_commits[level]) { + return *buffers[level]; + } + const auto new_size = NextPow2(needed_size); + VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + buffers[level] = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = new_size, + .usage = flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + buffer_commits[level] = std::make_unique( + memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); + return *buffers[level]; +} + +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, + std::span copies) { + std::vector vk_in_copies(copies.size()); + std::vector vk_out_copies(copies.size()); + const VkImageAspectFlags src_aspect_mask = src.AspectMask(); + const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); + + std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { + return MakeBufferImageCopy(copy, true, src_aspect_mask); + }); + std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { + return MakeBufferImageCopy(copy, false, dst_aspect_mask); + }); + const u32 img_bpp = BytesPerBlock(src.info.format); + size_t total_size = 0; + for (const auto& copy : copies) { + total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; + } + const VkBuffer copy_buffer = GetTemporaryBuffer(total_size); + const VkImage dst_image = dst.Handle(); + const VkImage src_image = src.Handle(); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask, + vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) { + RangedBarrierRange dst_range; + RangedBarrierRange src_range; + for (const VkBufferImageCopy& copy : vk_in_copies) { + src_range.AddLayers(copy.imageSubresource); + } + for (const VkBufferImageCopy& copy : vk_out_copies) { + dst_range.AddLayers(copy.imageSubresource); + } + static constexpr VkMemoryBarrier READ_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + }; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + const std::array pre_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = src_range.SubresourceRange(src_aspect_mask), + }, + }; + const std::array middle_in_barrier{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = src_range.SubresourceRange(src_aspect_mask), + }, + }; + const std::array middle_out_barrier{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), + }, + }; + const std::array post_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, {}, {}, pre_barriers); + + cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, + vk_in_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, WRITE_BARRIER, nullptr, middle_in_barrier); + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, READ_BARRIER, {}, middle_out_barrier); + cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, {}, {}, post_barriers); + }); +} + void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f5f8f9a74..44e9dcee4 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -61,6 +61,10 @@ public: void CopyImage(Image& dst, Image& src, std::span copies); + bool ShouldReinterpret(Image& dst, Image& src); + + void ReinterpretImage(Image& dst, Image& src, std::span copies); + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); bool CanAccelerateImageUpload(Image&) const noexcept { @@ -82,6 +86,8 @@ public: return true; } + [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); + const Device& device; VKScheduler& scheduler; MemoryAllocator& memory_allocator; @@ -90,6 +96,10 @@ public: ASTCDecoderPass& astc_decoder_pass; RenderPassCache& render_pass_cache; const Settings::ResolutionScalingInfo& resolution; + + constexpr static size_t indexing_slots = 8 * sizeof(size_t); + std::array buffers{}; + std::array, indexing_slots> buffer_commits{}; }; class Image : public VideoCommon::ImageBase { @@ -316,7 +326,6 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; - static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false; using Runtime = Vulkan::TextureCacheRuntime; using Image = Vulkan::Image; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4188f93c5..44a0d42ba 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1762,8 +1762,8 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector