early-access version 4142

This commit is contained in:
pineappleEA 2024-02-16 03:52:15 +01:00
parent 40d2b57dc0
commit 7a77957f8b
8 changed files with 67 additions and 15 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 4141. This is the source code for early-access 4142.
## Legal Notice ## Legal Notice

View file

@ -60,11 +60,10 @@ public:
Add(spv::ImageOperandsMask::ConstOffsets, offsets); Add(spv::ImageOperandsMask::ConstOffsets, offsets);
} }
explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, Id lod, Id ms) { explicit ImageOperands(Id lod, Id ms) {
if (Sirit::ValidId(lod)) { if (Sirit::ValidId(lod)) {
Add(spv::ImageOperandsMask::Lod, lod); Add(spv::ImageOperandsMask::Lod, lod);
} }
AddOffset(ctx, offset, ImageFetchOffsetAllowed);
if (Sirit::ValidId(ms)) { if (Sirit::ValidId(ms)) {
Add(spv::ImageOperandsMask::Sample, ms); Add(spv::ImageOperandsMask::Sample, ms);
} }
@ -312,6 +311,43 @@ Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info,
return coords; return coords;
} }
} }
void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, Id& coords,
Id offset) {
if (!Sirit::ValidId(offset)) {
return;
}
Id result_type{};
switch (info.type) {
case TextureType::Buffer:
case TextureType::Color1D: {
result_type = ctx.U32[1];
break;
}
case TextureType::ColorArray1D:
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
[[fallthrough]];
case TextureType::Color2D:
case TextureType::Color2DRect: {
result_type = ctx.U32[2];
break;
}
case TextureType::ColorArray2D:
offset = ctx.OpCompositeConstruct(ctx.U32[3], ctx.OpCompositeExtract(ctx.U32[1], coords, 0),
ctx.OpCompositeExtract(ctx.U32[1], coords, 1),
ctx.u32_zero_value);
[[fallthrough]];
case TextureType::Color3D: {
result_type = ctx.U32[3];
break;
}
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
return;
}
coords = ctx.OpIAdd(result_type, coords, offset);
}
} // Anonymous namespace } // Anonymous namespace
Id EmitBindlessImageSampleImplicitLod(EmitContext&) { Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@ -494,9 +530,10 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
operands.Span()); operands.Span());
} }
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
const IR::Value& offset, Id lod, Id ms) { Id lod, Id ms) {
const auto info{inst->Flags<IR::TextureInstInfo>()}; const auto info{inst->Flags<IR::TextureInstInfo>()};
AddOffsetToCoordinates(ctx, info, coords, offset);
if (info.type == TextureType::Buffer) { if (info.type == TextureType::Buffer) {
lod = Id{}; lod = Id{};
} }
@ -504,7 +541,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
// This image is multisampled, lod must be implicit // This image is multisampled, lod must be implicit
lod = Id{}; lod = Id{};
} }
const ImageOperands operands(ctx, offset, lod, ms); const ImageOperands operands(lod, ms);
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
} }

View file

@ -537,8 +537,8 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
const IR::Value& offset, const IR::Value& offset2); const IR::Value& offset, const IR::Value& offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2, Id dref); const IR::Value& offset, const IR::Value& offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
const IR::Value& offset, Id lod, Id ms); Id lod, Id ms);
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
const IR::Value& skip_mips); const IR::Value& skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);

View file

@ -35,7 +35,7 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R
const s64 min_spacing_critical = device_local_memory - 512_MiB; const s64 min_spacing_critical = device_local_memory - 512_MiB;
const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD);
const s64 min_vacancy_expected = (6 * mem_threshold) / 10; const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
const s64 min_vacancy_critical = (3 * mem_threshold) / 10; const s64 min_vacancy_critical = (2 * mem_threshold) / 10;
minimum_memory = static_cast<u64>( minimum_memory = static_cast<u64>(
std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
DEFAULT_EXPECTED_MEMORY)); DEFAULT_EXPECTED_MEMORY));

View file

@ -3,6 +3,7 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/math_util.h" #include "common/math_util.h"
#include "common/settings.h"
#include "video_core/surface.h" #include "video_core/surface.h"
namespace VideoCore::Surface { namespace VideoCore::Surface {
@ -400,11 +401,20 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
} }
u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format) { u64 TranscodedAstcSize(u64 base_size, PixelFormat format) {
constexpr u64 RGBA8_PIXEL_SIZE = 4; constexpr u64 RGBA8_PIXEL_SIZE = 4;
const u64 base_block_size = static_cast<u64>(DefaultBlockWidth(format)) * const u64 base_block_size = static_cast<u64>(DefaultBlockWidth(format)) *
static_cast<u64>(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE; static_cast<u64>(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE;
return (base_size * base_block_size) / BytesPerBlock(format); const u64 uncompressed_size = (base_size * base_block_size) / BytesPerBlock(format);
switch (Settings::values.astc_recompression.GetValue()) {
case Settings::AstcRecompression::Bc1:
return uncompressed_size / 8;
case Settings::AstcRecompression::Bc3:
return uncompressed_size / 4;
default:
return uncompressed_size;
}
} }
} // namespace VideoCore::Surface } // namespace VideoCore::Surface

View file

@ -517,6 +517,6 @@ size_t PixelComponentSizeBitsInteger(PixelFormat format);
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format); u64 TranscodedAstcSize(u64 base_size, PixelFormat format);
} // namespace VideoCore::Surface } // namespace VideoCore::Surface

View file

@ -55,7 +55,7 @@ TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag
const s64 min_spacing_critical = device_local_memory - 512_MiB; const s64 min_spacing_critical = device_local_memory - 512_MiB;
const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD);
const s64 min_vacancy_expected = (6 * mem_threshold) / 10; const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
const s64 min_vacancy_critical = (3 * mem_threshold) / 10; const s64 min_vacancy_critical = (2 * mem_threshold) / 10;
expected_memory = static_cast<u64>( expected_memory = static_cast<u64>(
std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
DEFAULT_EXPECTED_MEMORY)); DEFAULT_EXPECTED_MEMORY));
@ -1979,7 +1979,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
if ((IsPixelFormatASTC(image.info.format) && if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) || True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) { True(image.flags & ImageFlagBits::Converted)) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); tentative_size = TranscodedAstcSize(tentative_size, image.info.format);
} }
total_used_memory += Common::AlignUp(tentative_size, 1024); total_used_memory += Common::AlignUp(tentative_size, 1024);
image.lru_index = lru_cache.Insert(image_id, frame_tick); image.lru_index = lru_cache.Insert(image_id, frame_tick);
@ -2149,7 +2149,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
if ((IsPixelFormatASTC(image.info.format) && if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) || True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) { True(image.flags & ImageFlagBits::Converted)) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); tentative_size = TranscodedAstcSize(tentative_size, image.info.format);
} }
total_used_memory -= Common::AlignUp(tentative_size, 1024); total_used_memory -= Common::AlignUp(tentative_size, 1024);
const GPUVAddr gpu_addr = image.gpu_addr; const GPUVAddr gpu_addr = image.gpu_addr;

View file

@ -1290,6 +1290,10 @@ u64 Device::GetDeviceMemoryUsage() const {
} }
void Device::CollectPhysicalMemoryInfo() { void Device::CollectPhysicalMemoryInfo() {
// Account for resolution scaling in memory limits
const size_t normal_memory = 6_GiB;
const size_t scaler_memory = 1_GiB * Settings::values.resolution_info.ScaleUp(1);
// Calculate limits using memory budget // Calculate limits using memory budget
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
@ -1320,6 +1324,7 @@ void Device::CollectPhysicalMemoryInfo() {
if (!is_integrated) { if (!is_integrated) {
const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB); const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB);
device_access_memory -= reserve_memory; device_access_memory -= reserve_memory;
device_access_memory = std::min<u64>(device_access_memory, normal_memory + scaler_memory);
return; return;
} }
const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage); const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);