From 491be807d7b1097e89e536510be405bf8f1e12df Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sun, 11 Feb 2024 00:24:12 +0100 Subject: [PATCH] early-access version 4130 --- README.md | 2 +- .../hle/service/nvdrv/devices/nvhost_gpu.cpp | 7 + src/video_core/control/channel_state.h | 6 + src/video_core/control/scheduler.cpp | 194 +++++++++++++++++- src/video_core/control/scheduler.h | 22 +- src/video_core/engines/puller.cpp | 12 +- src/video_core/gpu.cpp | 8 + src/video_core/gpu.h | 9 +- src/video_core/gpu_thread.cpp | 14 +- src/video_core/gpu_thread.h | 9 +- src/video_core/texture_cache/image_info.cpp | 1 - src/video_core/texture_cache/image_info.h | 1 - src/video_core/texture_cache/texture_cache.h | 15 +- 13 files changed, 258 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index b647f6931..0734f6955 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 4129. +This is the source code for early-access 4130. ## Legal Notice diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index e71c58aa9..f537fb203 100755 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -13,6 +13,7 @@ #include "core/hle/service/nvdrv/nvdrv.h" #include "core/memory.h" #include "video_core/control/channel_state.h" +#include "video_core/control/scheduler.h" #include "video_core/engines/puller.h" #include "video_core/gpu.h" #include "video_core/host1x/host1x.h" @@ -33,6 +34,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_, syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()}, channel_state{system.GPU().AllocateChannel()} { channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false); + channel_state->syncpoint_id = channel_syncpoint; sm_exception_breakpoint_int_report_event = events_interface.CreateEvent("GpuChannelSMExceptionBreakpointInt"); sm_exception_breakpoint_pause_report_event = @@ -157,6 +159,9 @@ NvResult nvhost_gpu::SetErrorNotifier(IoctlSetErrorNotifier& params) { NvResult nvhost_gpu::SetChannelPriority(IoctlChannelSetPriority& params) { channel_priority = params.priority; + if (channel_state->initialized) { + system.GPU().Scheduler().ChangePriority(channel_state->bind_id, channel_priority); + } LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority); return NvResult::Success; } @@ -314,6 +319,7 @@ NvResult nvhost_gpu::GetWaitbase(IoctlGetWaitbase& params) { NvResult nvhost_gpu::ChannelSetTimeout(IoctlChannelSetTimeout& params) { LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout); + channel_state->timeout = params.timeout; return NvResult::Success; } @@ -321,6 +327,7 @@ NvResult nvhost_gpu::ChannelSetTimeslice(IoctlSetTimeslice& params) { LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); channel_timeslice = params.timeslice; + channel_state->timeslice = params.timeslice; return NvResult::Success; } diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h index ceaa92647..9f9b4ff75 100755 --- a/src/video_core/control/channel_state.h +++ b/src/video_core/control/channel_state.h @@ -45,6 +45,12 @@ struct ChannelState { void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); s32 bind_id = -1; + /// Scheduling info + u32 syncpoint_id = 0xFFFF; + u32 priority = 0; + u32 timeslice = 0; + u32 timeout = 0; + /// 3D engine std::unique_ptr maxwell_3d; /// 2D engine diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp index 31e3b1235..91d81500a 100755 --- a/src/video_core/control/scheduler.cpp +++ b/src/video_core/control/scheduler.cpp @@ -1,32 +1,204 @@ // SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later +#include +#include +#include #include +#include +#include #include "common/assert.h" -#include "video_core/control/channel_state.h" +#include "common/fiber.h" #include "video_core/control/scheduler.h" +#include "video_core/dma_pusher.h" #include "video_core/gpu.h" namespace Tegra::Control { -Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {} + +struct GPFifoContext { + bool is_active; + bool is_running; + std::shared_ptr context; + std::deque pending_work; + std::mutex guard; + s32 bind_id; + std::shared_ptr info; + size_t yield_count; + size_t scheduled_count; +}; + +struct Scheduler::SchedulerImpl { + // Fifos + std::map, std::greater> schedule_priority_queue; + std::unordered_map channel_gpfifo_ids; + std::deque gpfifos; + std::deque free_fifos; + + // Scheduling + std::mutex scheduling_guard; + std::shared_ptr master_control; + bool must_reschedule{}; + GPFifoContext* current_fifo{}; +}; + +Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} { + impl = std::make_unique(); +} Scheduler::~Scheduler() = default; +void Scheduler::Init() { + impl->master_control = Common::Fiber::ThreadToFiber(); +} + +void Scheduler::Resume() { + bool pending_work; + do { + pending_work = false; + { + std::unique_lock lk(impl->scheduling_guard); + impl->current_fifo = nullptr; + auto it = impl->schedule_priority_queue.begin(); + while (it != impl->schedule_priority_queue.end()) { + pending_work = ScheduleLevel(it->second); + if (pending_work) { + break; + } + it = std::next(it); + } + if (pending_work) { + impl->must_reschedule = false; + } + } + if (impl->current_fifo) { + impl->current_fifo->scheduled_count++; + Common::Fiber::YieldTo(impl->master_control, *impl->current_fifo->context); + } + } while (pending_work); +} + +bool Scheduler::ScheduleLevel(std::list& queue) { + bool found_anything = false; + size_t min_schedule_count = std::numeric_limits::max(); + for (auto id : queue) { + auto& fifo = impl->gpfifos[id]; + std::scoped_lock lk2(fifo.guard); + if (!fifo.pending_work.empty() || fifo.is_running) { + if (fifo.scheduled_count > min_schedule_count) { + continue; + } + if (fifo.scheduled_count < fifo.yield_count) { + fifo.scheduled_count++; + continue; + } + min_schedule_count = fifo.scheduled_count; + impl->current_fifo = &fifo; + found_anything = true; + } + } + return found_anything; +} + +void Scheduler::ChangePriority(s32 channel_id, u32 new_priority) { + std::unique_lock lk(impl->scheduling_guard); + auto fifo_it = impl->channel_gpfifo_ids.find(channel_id); + if (fifo_it == impl->channel_gpfifo_ids.end()) { + return; + } + const size_t fifo_id = fifo_it->second; + auto& fifo = impl->gpfifos[fifo_id]; + const auto old_priority = fifo.info->priority; + fifo.info->priority = new_priority; + impl->schedule_priority_queue.try_emplace(new_priority); + impl->schedule_priority_queue[new_priority].push_back(fifo_id); + impl->schedule_priority_queue[old_priority].remove_if( + [fifo_id](size_t id) { return id == fifo_id; }); +} + +void Scheduler::Yield() { + ASSERT(impl->current_fifo != nullptr); + impl->current_fifo->yield_count = impl->current_fifo->scheduled_count + 1; + Common::Fiber::YieldTo(impl->current_fifo->context, *impl->master_control); + gpu.BindChannel(impl->current_fifo->bind_id); +} + +void Scheduler::CheckStatus() { + { + std::unique_lock lk(impl->scheduling_guard); + if (!impl->must_reschedule) { + return; + } + } + Common::Fiber::YieldTo(impl->current_fifo->context, *impl->master_control); + gpu.BindChannel(impl->current_fifo->bind_id); +} + void Scheduler::Push(s32 channel, CommandList&& entries) { - std::unique_lock lk(scheduling_guard); - auto it = channels.find(channel); - ASSERT(it != channels.end()); - auto channel_state = it->second; - gpu.BindChannel(channel_state->bind_id); - channel_state->dma_pusher->Push(std::move(entries)); - channel_state->dma_pusher->DispatchCalls(); + std::unique_lock lk(impl->scheduling_guard); + auto it = impl->channel_gpfifo_ids.find(channel); + ASSERT(it != impl->channel_gpfifo_ids.end()); + auto gpfifo_id = it->second; + auto& fifo = impl->gpfifos[gpfifo_id]; + { + std::scoped_lock lk2(fifo.guard); + fifo.pending_work.emplace_back(std::move(entries)); + } + if (impl->current_fifo != nullptr && impl->current_fifo->info->priority < fifo.info->priority) { + impl->must_reschedule = true; + } +} + +void Scheduler::ChannelLoop(size_t gpfifo_id, s32 channel_id) { + gpu.BindChannel(channel_id); + auto& fifo = impl->gpfifos[gpfifo_id]; + while (true) { + auto* channel_state = fifo.info.get(); + fifo.guard.lock(); + while (!fifo.pending_work.empty()) { + fifo.is_running = true; + { + CommandList&& entries = std::move(fifo.pending_work.front()); + channel_state->dma_pusher->Push(std::move(entries)); + fifo.pending_work.pop_front(); + } + fifo.guard.unlock(); + channel_state->dma_pusher->DispatchCalls(); + CheckStatus(); + fifo.guard.lock(); + } + fifo.is_running = false; + fifo.guard.unlock(); + Common::Fiber::YieldTo(fifo.context, *impl->master_control); + gpu.BindChannel(channel_id); + } } void Scheduler::DeclareChannel(std::shared_ptr new_channel) { s32 channel = new_channel->bind_id; - std::unique_lock lk(scheduling_guard); - channels.emplace(channel, new_channel); + std::unique_lock lk(impl->scheduling_guard); + + size_t new_fifo_id; + if (!impl->free_fifos.empty()) { + new_fifo_id = impl->free_fifos.front(); + impl->free_fifos.pop_front(); + } else { + new_fifo_id = impl->gpfifos.size(); + impl->gpfifos.emplace_back(); + } + auto& new_fifo = impl->gpfifos[new_fifo_id]; + impl->channel_gpfifo_ids[channel] = new_fifo_id; + new_fifo.is_active = true; + new_fifo.bind_id = channel; + new_fifo.pending_work.clear(); + new_fifo.info = new_channel; + new_fifo.scheduled_count = 0; + new_fifo.yield_count = 0; + new_fifo.is_running = false; + impl->schedule_priority_queue.try_emplace(new_channel->priority); + impl->schedule_priority_queue[new_channel->priority].push_back(new_fifo_id); + std::function callback = std::bind(&Scheduler::ChannelLoop, this, new_fifo_id, channel); + new_fifo.context = std::make_shared(std::move(callback)); } } // namespace Tegra::Control diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h index eab52e536..faa888dde 100755 --- a/src/video_core/control/scheduler.h +++ b/src/video_core/control/scheduler.h @@ -3,10 +3,11 @@ #pragma once +#include #include -#include -#include +#include "common/common_types.h" +#include "video_core/control/channel_state.h" #include "video_core/dma_pusher.h" namespace Tegra { @@ -22,13 +23,26 @@ public: explicit Scheduler(GPU& gpu_); ~Scheduler(); + void Init(); + + void Resume(); + + void Yield(); + void Push(s32 channel, CommandList&& entries); void DeclareChannel(std::shared_ptr new_channel); + void ChangePriority(s32 channel_id, u32 new_priority); + private: - std::unordered_map> channels; - std::mutex scheduling_guard; + void ChannelLoop(size_t gpfifo_id, s32 channel_id); + bool ScheduleLevel(std::list& queue); + void CheckStatus(); + + struct SchedulerImpl; + std::unique_ptr impl; + GPU& gpu; }; diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 79d84d662..c39cada43 100755 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -6,6 +6,7 @@ #include "common/settings.h" #include "core/core.h" #include "video_core/control/channel_state.h" +#include "video_core/control/scheduler.h" #include "video_core/dma_pusher.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -14,6 +15,8 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/engines/puller.h" #include "video_core/gpu.h" +#include "video_core/host1x/host1x.h" +#include "video_core/host1x/syncpoint_manager.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -60,11 +63,14 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { } void Puller::ProcessFenceActionMethod() { + auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager(); switch (regs.fence_action.op) { case Puller::FenceOperation::Acquire: - // UNIMPLEMENTED_MSG("Channel Scheduling pending."); - // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); - rasterizer->ReleaseFences(); + while (regs.fence_value > + syncpoint_manager.GetGuestSyncpointValue(regs.fence_action.syncpoint_id)) { + rasterizer->ReleaseFences(); + gpu.Scheduler().Yield(); + } break; case Puller::FenceOperation::Increment: rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 59356015b..f45f797b3 100755 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -387,6 +387,14 @@ std::shared_ptr GPU::AllocateChannel() { return impl->AllocateChannel(); } +Tegra::Control::Scheduler& GPU::Scheduler() { + return *impl->scheduler; +} + +const Tegra::Control::Scheduler& GPU::Scheduler() const { + return *impl->scheduler; +} + void GPU::InitChannel(Control::ChannelState& to_init) { impl->InitChannel(to_init); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 25c75a109..154466f23 100755 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -124,7 +124,8 @@ class KeplerCompute; namespace Control { struct ChannelState; -} +class Scheduler; +} // namespace Control namespace Host1x { class Host1x; @@ -204,6 +205,12 @@ public: /// Returns a const reference to the shader notifier. [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const; + /// Returns GPU Channel Scheduler. + [[nodiscard]] Tegra::Control::Scheduler& Scheduler(); + + /// Returns GPU Channel Scheduler. + [[nodiscard]] const Tegra::Control::Scheduler& Scheduler() const; + [[nodiscard]] u64 GetTicks() const; [[nodiscard]] bool IsAsync() const; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 0832234af..3aa59d034 100755 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -34,13 +34,15 @@ static void RunThread(std::stop_token stop_token, Core::System& system, CommandDataContainer next; + scheduler.Init(); + while (!stop_token.stop_requested()) { state.queue.PopWait(next, stop_token); if (stop_token.stop_requested()) { break; } - if (auto* submit_list = std::get_if(&next.data)) { - scheduler.Push(submit_list->channel, std::move(submit_list->entries)); + if (std::holds_alternative(next.data)) { + scheduler.Resume(); } else if (std::holds_alternative(next.data)) { system.GPU().TickWork(); } else if (const auto* flush = std::get_if(&next.data)) { @@ -67,14 +69,16 @@ ThreadManager::~ThreadManager() = default; void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, - Tegra::Control::Scheduler& scheduler) { + Tegra::Control::Scheduler& scheduler_) { rasterizer = renderer.ReadRasterizer(); + scheduler = &scheduler_; thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), - std::ref(scheduler), std::ref(state)); + std::ref(scheduler_), std::ref(state)); } void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) { - PushCommand(SubmitListCommand(channel, std::move(entries))); + scheduler->Push(channel, std::move(entries)); + PushCommand(SubmitListCommand()); } void ThreadManager::FlushRegion(DAddr addr, u64 size) { diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 1a0de86b5..a4b01c42a 100755 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -36,13 +36,7 @@ class RendererBase; namespace VideoCommon::GPUThread { /// Command to signal to the GPU thread that a command list is ready for processing -struct SubmitListCommand final { - explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_) - : channel{channel_}, entries{std::move(entries_)} {} - - s32 channel; - Tegra::CommandList entries; -}; +struct SubmitListCommand final {}; /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { @@ -124,6 +118,7 @@ public: private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data, bool block = false); + Tegra::Control::Scheduler* scheduler; Core::System& system; const bool is_async; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 4cfaf7ce6..22eb7bd00 100755 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -42,7 +42,6 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { }; } rescaleable = false; - is_sparse = config.is_sparse != 0; tile_width_spacing = config.tile_width_spacing; if (config.texture_type != TextureType::Texture2D && config.texture_type != TextureType::Texture2DNoMipmap) { diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 286457acc..0a50795b3 100755 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -41,7 +41,6 @@ struct ImageInfo { bool downscaleable = false; bool forced_flushed = false; bool dma_downloaded = false; - bool is_sparse = false; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 47ea0bd96..85046e708 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -600,17 +600,17 @@ void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; - if (False(image.flags & ImageFlagBits::CpuModified)) { - image.flags |= ImageFlagBits::CpuModified; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } + if (True(image.flags & ImageFlagBits::CpuModified)) { + continue; } - + image.flags |= ImageFlagBits::CpuModified; if (True(image.flags & ImageFlagBits::Remapped)) { continue; } image.flags |= ImageFlagBits::Remapped; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } } } @@ -1469,8 +1469,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; - if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes) && - new_info.is_sparse) { + if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { new_image.flags |= ImageFlagBits::Sparse; }