From 28e97ec962ec98fa4e13ab95ba1862cd80adbc6c Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 30 Dec 2023 11:00:24 +0100 Subject: [PATCH] early-access version 4039 --- README.md | 2 +- src/common/heap_tracker.cpp | 36 ++++++++++++++----- src/common/heap_tracker.h | 1 + .../backend/glsl/emit_glsl_image.cpp | 12 +++---- .../backend/glsl/emit_glsl_instructions.h | 2 +- .../backend/spirv/spirv_emit_context.cpp | 2 +- src/video_core/renderer_opengl/gl_device.cpp | 11 +++--- src/yuzu/configuration/shared_translation.cpp | 2 +- 8 files changed, 44 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 18e4ef637..069e76f9a 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 4037. +This is the source code for early-access 4039. ## Legal Notice diff --git a/src/common/heap_tracker.cpp b/src/common/heap_tracker.cpp index 95dc8aa1e..683208795 100755 --- a/src/common/heap_tracker.cpp +++ b/src/common/heap_tracker.cpp @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include +#include #include #include "common/heap_tracker.h" @@ -11,11 +11,25 @@ namespace Common { namespace { -constexpr s64 MaxResidentMapCount = 0x8000; +s64 GetMaxPermissibleResidentMapCount() { + // Default value. + s64 value = 65530; + + // Try to read how many mappings we can make. + std::ifstream s("/proc/sys/vm/max_map_count"); + s >> value; + + // Print, for debug. + LOG_INFO(HW_Memory, "Current maximum map count: {}", value); + + // Allow 20000 maps for other code and to account for split inaccuracy. + return std::max(value - 20000, 0); +} } // namespace -HeapTracker::HeapTracker(Common::HostMemory& buffer) : m_buffer(buffer) {} +HeapTracker::HeapTracker(Common::HostMemory& buffer) + : m_buffer(buffer), m_max_resident_map_count(GetMaxPermissibleResidentMapCount()) {} HeapTracker::~HeapTracker() = default; void HeapTracker::Map(size_t virtual_offset, size_t host_offset, size_t length, @@ -74,8 +88,8 @@ void HeapTracker::Unmap(size_t virtual_offset, size_t size, bool is_separate_hea } // Erase from map. - it = m_mappings.erase(it); ASSERT(--m_map_count >= 0); + it = m_mappings.erase(it); // Free the item. delete item; @@ -94,8 +108,8 @@ void HeapTracker::Protect(size_t virtual_offset, size_t size, MemoryPermission p this->SplitHeapMap(virtual_offset, size); // Declare tracking variables. + const VAddr end = virtual_offset + size; VAddr cur = virtual_offset; - VAddr end = virtual_offset + size; while (cur < end) { VAddr next = cur; @@ -167,7 +181,7 @@ bool HeapTracker::DeferredMapSeparateHeap(size_t virtual_offset) { it->tick = m_tick++; // Check if we need to rebuild. - if (m_resident_map_count > MaxResidentMapCount) { + if (m_resident_map_count > m_max_resident_map_count) { rebuild_required = true; } @@ -193,8 +207,12 @@ void HeapTracker::RebuildSeparateHeapAddressSpace() { ASSERT(!m_resident_mappings.empty()); - // Unmap so we have at least 4 maps available. - const size_t desired_count = std::min(m_resident_map_count, MaxResidentMapCount - 4); + // Dump half of the mappings. + // + // Despite being worse in theory, this has proven to be better in practice than more + // regularly dumping a smaller amount, because it significantly reduces average case + // lock contention. + const size_t desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2; const size_t evict_count = m_resident_map_count - desired_count; auto it = m_resident_mappings.begin(); @@ -247,8 +265,8 @@ void HeapTracker::SplitHeapMapLocked(VAddr offset) { // If resident, also insert into resident map. if (right->is_resident) { - m_resident_mappings.insert(*right); m_resident_map_count++; + m_resident_mappings.insert(*right); } } diff --git a/src/common/heap_tracker.h b/src/common/heap_tracker.h index cc16041d9..ee5b0bf43 100755 --- a/src/common/heap_tracker.h +++ b/src/common/heap_tracker.h @@ -86,6 +86,7 @@ private: private: Common::HostMemory& m_buffer; + const s64 m_max_resident_map_count; std::shared_mutex m_rebuild_lock{}; std::mutex m_lock{}; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 2d7bb6c10..0c05be68b 100755 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -449,7 +449,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde } void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view coords, const IR::Value& offset, std::string_view lod, std::string_view ms) { const auto info{inst.Flags()}; if (info.has_bias) { @@ -470,9 +470,9 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto int_coords{CoordsCastToInt(coords, info)}; if (!ms.empty()) { ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms); - } else if (!offset.empty()) { + } else if (!offset.IsEmpty()) { ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod, - CoordsCastToInt(offset, info)); + GetOffsetVec(ctx, offset)); } else { if (info.type == TextureType::Buffer) { ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); @@ -485,10 +485,10 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, if (!ms.empty()) { throw NotImplementedException("EmitImageFetch Sparse MSAA samples"); } - if (!offset.empty()) { + if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", - *sparse_inst, texture, CastToIntVec(coords, info), lod, - CastToIntVec(offset, info), texel); + *sparse_inst, texture, CastToIntVec(coords, info), lod, GetOffsetVec(ctx, offset), + texel); } else { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index f8c862a32..fad4c8a58 100755 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -651,7 +651,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde std::string_view coords, const IR::Value& offset, const IR::Value& offset2, std::string_view dref); void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view coords, const IR::Value& offset, std::string_view lod, std::string_view ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view lod, const IR::Value& skip_mips); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index d278f70a2..164095b5d 100755 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1440,7 +1440,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { if (profile.support_vertex_instance_id) { instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); if (loads[IR::Attribute::BaseInstance]) { - base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } else { instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3d13c695f..9f9469049 100755 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -195,9 +195,9 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod"); has_astc = !has_slow_software_astc && IsASTCSupported(); has_variable_aoffi = TestVariableAoffi(); - has_component_indexing_bug = is_amd; + has_component_indexing_bug = false; has_precise_bug = TestPreciseBug(); - has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); + has_broken_texture_view_formats = (!is_linux && is_intel); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_derivative_control = GLAD_GL_ARB_derivative_control; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; @@ -238,10 +238,11 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_lmem_perf_bug = is_nvidia; strict_context_required = emu_window.StrictContextRequired(); - // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. + // Blocks Intel OpenGL drivers on Windows from using asynchronous shader compilation. // Blocks EGL on Wayland from using asynchronous shader compilation. - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - !(is_amd || (is_intel && !is_linux)) && !strict_context_required; + const bool blacklist_async_shaders = (is_intel && !is_linux) || strict_context_required; + use_asynchronous_shaders = + Settings::values.use_asynchronous_shaders.GetValue() && !blacklist_async_shaders; use_driver_cache = is_nvidia; supports_conditional_barriers = !is_intel; diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp index 7e908924c..922eb1b1a 100755 --- a/src/yuzu/configuration/shared_translation.cpp +++ b/src/yuzu/configuration/shared_translation.cpp @@ -228,7 +228,7 @@ std::unique_ptr ComboboxEnumeration(QWidget* parent) { { PAIR(ShaderBackend, Glsl, tr("GLSL")), PAIR(ShaderBackend, Glasm, tr("GLASM (Assembly Shaders, NVIDIA Only)")), - PAIR(ShaderBackend, SpirV, tr("SPIR-V (Experimental, Mesa Only)")), + PAIR(ShaderBackend, SpirV, tr("SPIR-V (Experimental, AMD/Mesa Only)")), }}); translations->insert({Settings::EnumMetadata::Index(), {