From 925a78d02c875154e1baf2e742d471c81da1bdc1 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Tue, 12 Oct 2021 22:47:55 +0200 Subject: [PATCH] early-access version 2126 --- README.md | 2 +- externals/dynarmic/CMakeLists.txt | 28 +++++- externals/dynarmic/externals/CMakeLists.txt | 26 ++++-- .../dynarmic/src/dynarmic/CMakeLists.txt | 4 + .../dynarmic/backend/x64/a32_interface.cpp | 6 +- .../dynarmic/backend/x64/a64_interface.cpp | 28 +++++- .../dynarmic/backend/x64/emit_x64_vector.cpp | 92 +++++++++++++++++++ .../backend/x64/exception_handler_posix.cpp | 2 +- .../src/dynarmic/common/fp/unpacked.h | 2 +- .../src/dynarmic/common/x64_disassemble.cpp | 25 ++++- .../src/dynarmic/common/x64_disassemble.h | 9 +- .../A64/translate/impl/simd_across_lanes.cpp | 19 +--- .../dynarmic/src/dynarmic/interface/A32/a32.h | 7 ++ .../dynarmic/src/dynarmic/interface/A64/a64.h | 7 ++ .../dynarmic/src/dynarmic/ir/ir_emitter.cpp | 15 +++ .../dynarmic/src/dynarmic/ir/ir_emitter.h | 1 + .../dynarmic/src/dynarmic/ir/opcodes.inc | 4 + externals/dynarmic/tests/A64/a64.cpp | 78 ++++++++++++++++ externals/dynarmic/tests/cpu_info.cpp | 2 - src/yuzu/configuration/configure_tas.ui | 8 -- 20 files changed, 316 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index cfa107e4f..4204fccd2 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2125. +This is the source code for early-access 2126. ## Legal Notice diff --git a/externals/dynarmic/CMakeLists.txt b/externals/dynarmic/CMakeLists.txt index 2b68b0b75..ff98e034e 100755 --- a/externals/dynarmic/CMakeLists.txt +++ b/externals/dynarmic/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.8) -project(dynarmic C CXX ASM) +project(dynarmic LANGUAGES C CXX ASM VERSION 5) # Determine if we're built as a subproject (using add_subdirectory) # or if this is the master project. @@ -122,6 +122,27 @@ if (NOT TARGET boost) target_include_directories(boost SYSTEM INTERFACE ${Boost_INCLUDE_DIRS}) endif() +if (DYNARMIC_NO_BUNDLED_FMT AND NOT TARGET fmt AND NOT TARGET fmt::fmt) + find_package(fmt REQUIRED) + add_library(fmt ALIAS fmt::fmt) +endif() + +if (DYNARMIC_NO_BUNDLED_ROBIN_MAP AND NOT TARGET tsl::robin_map) + find_package(tsl-robin-map REQUIRED) +endif() + +if (DYNARMIC_NO_BUNDLED_XBYAK AND NOT TARGET xbyak) + if (ARCHITECTURE STREQUAL "x86" OR ARCHITECTURE STREQUAL "x86_64") + find_package(xbyak REQUIRED) + add_library(xbyak ALIAS xbyak::xbyak) + endif() +endif() + +if (DYNARMIC_NO_BUNDLED_ZYDIS AND NOT TARGET Zydis) + find_package(Zydis REQUIRED) + add_library(Zydis ALIAS Zydis::Zydis) +endif() + # Enable unit-testing. enable_testing(true) @@ -137,6 +158,11 @@ if (DYNARMIC_TESTS_USE_UNICORN) find_package(Unicorn REQUIRED) endif() +if (DYNARMIC_TESTS AND DYNARMIC_NO_BUNDLED_CATCH AND NOT TARGET catch) + find_package(Catch2 REQUIRED) + add_library(catch ALIAS Catch2::Catch2) +endif() + # Pull in externals CMakeLists for libs where available add_subdirectory(externals) diff --git a/externals/dynarmic/externals/CMakeLists.txt b/externals/dynarmic/externals/CMakeLists.txt index 5326e2e60..21ace6fb3 100755 --- a/externals/dynarmic/externals/CMakeLists.txt +++ b/externals/dynarmic/externals/CMakeLists.txt @@ -4,12 +4,14 @@ # catch -add_library(catch INTERFACE) -target_include_directories(catch INTERFACE $) +if (DYNARMIC_TESTS AND NOT TARGET catch) + add_library(catch INTERFACE) + target_include_directories(catch INTERFACE $) +endif() # fmt -if (NOT DYNARMIC_NO_BUNDLED_FMT) +if (NOT TARGET fmt AND NOT TARGET fmt::fmt) # fmtlib formatting library add_subdirectory(fmt) endif() @@ -21,9 +23,11 @@ target_include_directories(mp INTERFACE $") +if (NOT TARGET tsl::robin_map) + add_library(robin_map INTERFACE) + add_library(tsl::robin_map ALIAS robin_map) + target_include_directories(robin_map SYSTEM INTERFACE "$") +endif() # vixl @@ -45,7 +49,9 @@ endif() # zydis -option(ZYDIS_BUILD_TOOLS "" OFF) -option(ZYDIS_BUILD_EXAMPLES "" OFF) -set(ZYDIS_ZYCORE_PATH "${CMAKE_CURRENT_LIST_DIR}/zycore" CACHE PATH "") -add_subdirectory(zydis EXCLUDE_FROM_ALL) +if (NOT TARGET Zydis) + option(ZYDIS_BUILD_TOOLS "" OFF) + option(ZYDIS_BUILD_EXAMPLES "" OFF) + set(ZYDIS_ZYCORE_PATH "${CMAKE_CURRENT_LIST_DIR}/zycore" CACHE PATH "") + add_subdirectory(zydis EXCLUDE_FROM_ALL) +endif() diff --git a/externals/dynarmic/src/dynarmic/CMakeLists.txt b/externals/dynarmic/src/dynarmic/CMakeLists.txt index fb73efbaa..902ac6145 100755 --- a/externals/dynarmic/src/dynarmic/CMakeLists.txt +++ b/externals/dynarmic/src/dynarmic/CMakeLists.txt @@ -364,6 +364,10 @@ include(CreateDirectoryGroups) create_target_directory_groups(dynarmic) target_include_directories(dynarmic PUBLIC ..) +set_target_properties(dynarmic PROPERTIES + VERSION ${dynarmic_VERSION} + SOVERSION ${dynarmic_VERSION_MAJOR} +) target_compile_options(dynarmic PRIVATE ${DYNARMIC_CXX_FLAGS}) target_link_libraries(dynarmic PUBLIC diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index 69dfce084..24b48c038 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -318,8 +318,12 @@ void Jit::LoadContext(const Context& ctx) { } void Jit::DumpDisassembly() const { - const size_t size = (const char*)impl->block_of_code.getCurr() - (const char*)impl->block_of_code.GetCodeBegin(); + const size_t size = reinterpret_cast(impl->block_of_code.getCurr()) - reinterpret_cast(impl->block_of_code.GetCodeBegin()); Common::DumpDisassembledX64(impl->block_of_code.GetCodeBegin(), size); } +std::vector Jit::Disassemble() const { + const size_t size = reinterpret_cast(impl->block_of_code.getCurr()) - reinterpret_cast(impl->block_of_code.GetCodeBegin()); + return Common::DisassembleX64(impl->block_of_code.GetCodeBegin(), size); +} } // namespace Dynarmic::A32 diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index 4dc68c2f7..cb4ed12a3 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -5,6 +5,7 @@ #include #include +#include #include @@ -57,6 +58,8 @@ public: void Run() { ASSERT(!is_executing); + PerformRequestedCacheInvalidation(); + is_executing = true; SCOPE_EXIT { this->is_executing = false; }; jit_state.halt_requested = false; @@ -80,6 +83,8 @@ public: void Step() { ASSERT(!is_executing); + PerformRequestedCacheInvalidation(); + is_executing = true; SCOPE_EXIT { this->is_executing = false; }; jit_state.halt_requested = true; @@ -90,15 +95,21 @@ public: } void ClearCache() { + std::unique_lock lock{invalidation_mutex}; invalidate_entire_cache = true; - RequestCacheInvalidation(); + if (is_executing) { + jit_state.halt_requested = true; + } } void InvalidateCacheRange(u64 start_address, size_t length) { + std::unique_lock lock{invalidation_mutex}; const auto end_address = static_cast(start_address + length - 1); const auto range = boost::icl::discrete_interval::closed(start_address, end_address); invalid_cache_ranges.add(range); - RequestCacheInvalidation(); + if (is_executing) { + jit_state.halt_requested = true; + } } void Reset() { @@ -200,10 +211,15 @@ public: } void DumpDisassembly() const { - const size_t size = (const char*)block_of_code.getCurr() - (const char*)block_of_code.GetCodeBegin(); + const size_t size = reinterpret_cast(block_of_code.getCurr()) - reinterpret_cast(block_of_code.GetCodeBegin()); Common::DumpDisassembledX64(block_of_code.GetCodeBegin(), size); } + std::vector Disassemble() const { + const size_t size = reinterpret_cast(block_of_code.getCurr()) - reinterpret_cast(block_of_code.GetCodeBegin()); + return Common::DisassembleX64(block_of_code.GetCodeBegin(), size); + } + private: static CodePtr GetCurrentBlockThunk(void* thisptr) { Jit::Impl* this_ = static_cast(thisptr); @@ -263,6 +279,7 @@ private: } void PerformRequestedCacheInvalidation() { + std::unique_lock lock{invalidation_mutex}; if (!invalidate_entire_cache && invalid_cache_ranges.empty()) { return; } @@ -287,6 +304,7 @@ private: bool invalidate_entire_cache = false; boost::icl::interval_set invalid_cache_ranges; + std::mutex invalidation_mutex; }; Jit::Jit(UserConfig conf) @@ -402,4 +420,8 @@ void Jit::DumpDisassembly() const { return impl->DumpDisassembly(); } +std::vector Jit::Disassemble() const { + return impl->Disassemble(); +} + } // namespace Dynarmic::A64 diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 79d360138..5a892d282 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -2990,6 +2990,98 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, data); } +void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm temp = xmm0; + + // Add upper elements to lower elements + code.pshufd(temp, data, 0b01'00'11'10); + code.paddb(data, temp); + + // Add adjacent 8-bit values into 64-bit lanes + code.pxor(temp, temp); + code.psadbw(data, temp); + + // Zero-extend lower 8-bits + code.pslldq(data, 15); + code.psrldq(data, 15); + + ctx.reg_alloc.DefineValue(inst, data); +} + +void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm temp = xmm0; + + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pxor(temp, temp); + code.phaddw(data, xmm0); + code.phaddw(data, xmm0); + code.phaddw(data, xmm0); + } else { + // Add upper elements to lower elements + code.pshufd(temp, data, 0b00'01'10'11); + code.paddw(data, temp); + + // Add pairs of 16-bit values into 32-bit lanes + code.movdqa(temp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); + code.pmaddwd(data, temp); + + // Sum adjacent 32-bit lanes + code.pshufd(temp, data, 0b10'11'00'01); + code.paddd(data, temp); + // Zero-extend lower 16-bits + code.pslldq(data, 14); + code.psrldq(data, 14); + } + + ctx.reg_alloc.DefineValue(inst, data); +} + +void EmitX64::EmitVectorReduceAdd32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm temp = xmm0; + + // Add upper elements to lower elements(reversed) + code.pshufd(temp, data, 0b00'01'10'11); + code.paddd(data, temp); + + // Sum adjacent 32-bit lanes + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.phaddd(data, data); + } else { + code.pshufd(temp, data, 0b10'11'00'01); + code.paddd(data, temp); + } + + // shift upper-most result into lower-most lane + code.psrldq(data, 12); + + ctx.reg_alloc.DefineValue(inst, data); +} + +void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm temp = xmm0; + + // Add upper elements to lower elements + code.pshufd(temp, data, 0b01'00'11'10); + code.paddq(data, temp); + + // Zero-extend lower 64-bits + code.movq(data, data); + + ctx.reg_alloc.DefineValue(inst, data); +} + static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_posix.cpp b/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_posix.cpp index ff01ab606..bdaac0c3e 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_posix.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_posix.cpp @@ -64,7 +64,7 @@ private: SigHandler sig_handler; SigHandler::SigHandler() { - constexpr size_t signal_stack_size = std::max(SIGSTKSZ, 2 * 1024 * 1024); + const size_t signal_stack_size = std::max(SIGSTKSZ, 2 * 1024 * 1024); signal_stack_memory = std::malloc(signal_stack_size); diff --git a/externals/dynarmic/src/dynarmic/common/fp/unpacked.h b/externals/dynarmic/src/dynarmic/common/fp/unpacked.h index e77c87a53..5b2528050 100755 --- a/externals/dynarmic/src/dynarmic/common/fp/unpacked.h +++ b/externals/dynarmic/src/dynarmic/common/fp/unpacked.h @@ -46,7 +46,7 @@ constexpr FPUnpacked ToNormalized(bool sign, int exponent, u64 value) { const int highest_bit = Common::HighestSetBit(value); const int offset = static_cast(normalized_point_position) - highest_bit; value <<= offset; - exponent -= offset - normalized_point_position; + exponent -= offset - static_cast(normalized_point_position); return {sign, exponent, value}; } diff --git a/externals/dynarmic/src/dynarmic/common/x64_disassemble.cpp b/externals/dynarmic/src/dynarmic/common/x64_disassemble.cpp index 0cfb204d2..ce32028d0 100755 --- a/externals/dynarmic/src/dynarmic/common/x64_disassemble.cpp +++ b/externals/dynarmic/src/dynarmic/common/x64_disassemble.cpp @@ -21,15 +21,36 @@ void DumpDisassembledX64(const void* ptr, size_t size) { size_t offset = 0; ZydisDecodedInstruction instruction; - while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, (const char*)ptr + offset, size - offset, &instruction))) { + while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, static_cast(ptr) + offset, size - offset, &instruction))) { fmt::print("{:016x} ", (u64)ptr + offset); char buffer[256]; - ZydisFormatterFormatInstruction(&formatter, &instruction, buffer, sizeof(buffer), (u64)ptr + offset); + ZydisFormatterFormatInstruction(&formatter, &instruction, buffer, sizeof(buffer), reinterpret_cast(ptr) + offset); puts(buffer); offset += instruction.length; } } +std::vector DisassembleX64(const void* ptr, size_t size) { + std::vector result; + ZydisDecoder decoder; + ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64); + + ZydisFormatter formatter; + ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); + + size_t offset = 0; + ZydisDecodedInstruction instruction; + while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, static_cast(ptr) + offset, size - offset, &instruction))) { + char buffer[256]; + ZydisFormatterFormatInstruction(&formatter, &instruction, buffer, sizeof(buffer), reinterpret_cast(ptr) + offset); + + result.push_back(fmt::format("{:016x} {}", (u64)ptr + offset, buffer)); + + offset += instruction.length; + } + + return result; +} } // namespace Dynarmic::Common diff --git a/externals/dynarmic/src/dynarmic/common/x64_disassemble.h b/externals/dynarmic/src/dynarmic/common/x64_disassemble.h index 4b1e66604..ec7a33787 100755 --- a/externals/dynarmic/src/dynarmic/common/x64_disassemble.h +++ b/externals/dynarmic/src/dynarmic/common/x64_disassemble.h @@ -5,10 +5,17 @@ #pragma once +#include +#include + #include "dynarmic/common/common_types.h" namespace Dynarmic::Common { void DumpDisassembledX64(const void* ptr, size_t size); - +/** + * Disassemble `size' bytes from `ptr' and return the disassembled lines as a vector + * of strings. + */ +std::vector DisassembleX64(const void* ptr, size_t size); } // namespace Dynarmic::Common diff --git a/externals/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_across_lanes.cpp b/externals/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_across_lanes.cpp index 47aa5871c..6147a54a0 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_across_lanes.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_across_lanes.cpp @@ -171,27 +171,10 @@ bool TranslatorVisitor::ADDV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { const size_t esize = 8 << size.ZeroExtend(); const size_t datasize = Q ? 128 : 64; - const size_t elements = datasize / esize; const IR::U128 operand = V(datasize, Vn); - const auto get_element = [&](IR::U128 vec, size_t element) { - return ir.ZeroExtendToWord(ir.VectorGetElement(esize, vec, element)); - }; - - IR::U32 sum = get_element(operand, 0); - for (size_t i = 1; i < elements; i++) { - sum = ir.Add(sum, get_element(operand, i)); - } - - if (size == 0b00) { - V(datasize, Vd, ir.ZeroExtendToQuad(ir.LeastSignificantByte(sum))); - } else if (size == 0b01) { - V(datasize, Vd, ir.ZeroExtendToQuad(ir.LeastSignificantHalf(sum))); - } else { - V(datasize, Vd, ir.ZeroExtendToQuad(sum)); - } - + V(128, Vd, ir.VectorReduceAdd(esize, operand)); return true; } diff --git a/externals/dynarmic/src/dynarmic/interface/A32/a32.h b/externals/dynarmic/src/dynarmic/interface/A32/a32.h index 99a823e50..c36f9fcbf 100755 --- a/externals/dynarmic/src/dynarmic/interface/A32/a32.h +++ b/externals/dynarmic/src/dynarmic/interface/A32/a32.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "dynarmic/interface/A32/config.h" @@ -91,6 +92,12 @@ public: /// Debugging: Dump a disassembly all compiled code to the console. void DumpDisassembly() const; + /** + * Disassemble the instructions following the current pc and return + * the resulting instructions as a vector of their string representations. + */ + std::vector Disassemble() const; + private: bool is_executing = false; diff --git a/externals/dynarmic/src/dynarmic/interface/A64/a64.h b/externals/dynarmic/src/dynarmic/interface/A64/a64.h index 384ad239d..d908fc21f 100755 --- a/externals/dynarmic/src/dynarmic/interface/A64/a64.h +++ b/externals/dynarmic/src/dynarmic/interface/A64/a64.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "dynarmic/interface/A64/config.h" @@ -117,6 +118,12 @@ public: /// Debugging: Dump a disassembly all of compiled code to the console. void DumpDisassembly() const; + /* + * Disassemble the instructions following the current pc and return + * the resulting instructions as a vector of their string representations. + */ + std::vector Disassemble() const; + private: struct Impl; std::unique_ptr impl; diff --git a/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp index 717d4156b..7156c247d 100755 --- a/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ b/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp @@ -1526,6 +1526,21 @@ U128 IREmitter::VectorReverseBits(const U128& a) { return Inst(Opcode::VectorReverseBits, a); } +U128 IREmitter::VectorReduceAdd(size_t esize, const U128& a) { + switch (esize) { + case 8: + return Inst(Opcode::VectorReduceAdd8, a); + case 16: + return Inst(Opcode::VectorReduceAdd16, a); + case 32: + return Inst(Opcode::VectorReduceAdd32, a); + case 64: + return Inst(Opcode::VectorReduceAdd64, a); + } + + UNREACHABLE(); +} + U128 IREmitter::VectorRotateLeft(size_t esize, const U128& a, u8 amount) { ASSERT(amount < esize); diff --git a/externals/dynarmic/src/dynarmic/ir/ir_emitter.h b/externals/dynarmic/src/dynarmic/ir/ir_emitter.h index d1fc8a7d9..7336af58b 100755 --- a/externals/dynarmic/src/dynarmic/ir/ir_emitter.h +++ b/externals/dynarmic/src/dynarmic/ir/ir_emitter.h @@ -294,6 +294,7 @@ public: U128 VectorPolynomialMultiplyLong(size_t esize, const U128& a, const U128& b); U128 VectorPopulationCount(const U128& a); U128 VectorReverseBits(const U128& a); + U128 VectorReduceAdd(size_t esize, const U128& a); U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount); U128 VectorRotateRight(size_t esize, const U128& a, u8 amount); U128 VectorRoundingHalvingAddSigned(size_t esize, const U128& a, const U128& b); diff --git a/externals/dynarmic/src/dynarmic/ir/opcodes.inc b/externals/dynarmic/src/dynarmic/ir/opcodes.inc index 41bd99b84..0c27dbe6c 100755 --- a/externals/dynarmic/src/dynarmic/ir/opcodes.inc +++ b/externals/dynarmic/src/dynarmic/ir/opcodes.inc @@ -431,6 +431,10 @@ OPCODE(VectorPolynomialMultiplyLong8, U128, U128 OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 ) OPCODE(VectorPopulationCount, U128, U128 ) OPCODE(VectorReverseBits, U128, U128 ) +OPCODE(VectorReduceAdd8, U128, U128 ) +OPCODE(VectorReduceAdd16, U128, U128 ) +OPCODE(VectorReduceAdd32, U128, U128 ) +OPCODE(VectorReduceAdd64, U128, U128 ) OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 ) OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 ) OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 ) diff --git a/externals/dynarmic/tests/A64/a64.cpp b/externals/dynarmic/tests/A64/a64.cpp index 3f36524af..2f2e2f622 100755 --- a/externals/dynarmic/tests/A64/a64.cpp +++ b/externals/dynarmic/tests/A64/a64.cpp @@ -32,6 +32,84 @@ TEST_CASE("A64: ADD", "[a64]") { REQUIRE(jit.GetPC() == 4); } +TEST_CASE("A64: ADD{V,P}", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x0E31B801); // ADDV b1, v0.8b + env.code_mem.emplace_back(0x4E31B802); // ADDV b2, v0.16b + env.code_mem.emplace_back(0x0E71B803); // ADDV h3, v0.4h + env.code_mem.emplace_back(0x4E71B804); // ADDV h4, v0.8h + env.code_mem.emplace_back(0x0EA0BC05); // ADDP v5.2s, v0.2s, v0.2s + env.code_mem.emplace_back(0x4EB1B806); // ADDV s6, v0.4s + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetVector(0, {0x0101010101010101, 0x0101010101010101}); + jit.SetPC(0); + + env.ticks_left = 7; + jit.Run(); + + REQUIRE(jit.GetVector(1) == Vector{0x0000000000000008, 0x0000000000000000}); + REQUIRE(jit.GetVector(2) == Vector{0x0000000000000010, 0x0000000000000000}); + REQUIRE(jit.GetVector(3) == Vector{0x0000000000000404, 0x0000000000000000}); + REQUIRE(jit.GetVector(4) == Vector{0x0000000000000808, 0x0000000000000000}); + REQUIRE(jit.GetVector(5) == Vector{0x0202020202020202, 0x0000000000000000}); + REQUIRE(jit.GetVector(6) == Vector{0x0000000004040404, 0x0000000000000000}); +} + +TEST_CASE("A64: UADDL{V,P}", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x2E303801); // UADDLV h1, v0.8b + env.code_mem.emplace_back(0x6E303802); // UADDLV h2, v0.16b + env.code_mem.emplace_back(0x2E703803); // UADDLV s3, v0.4h + env.code_mem.emplace_back(0x6E703804); // UADDLV s4, v0.8h + env.code_mem.emplace_back(0x2EA02805); // UADDLP v5.1d, v0.2s + env.code_mem.emplace_back(0x6EB03806); // UADDLV d6, v0.4s + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetVector(0, {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}); + jit.SetPC(0); + + env.ticks_left = 7; + jit.Run(); + + REQUIRE(jit.GetVector(1) == Vector{0x00000000000007f8, 0x0000000000000000}); + REQUIRE(jit.GetVector(2) == Vector{0x0000000000000ff0, 0x0000000000000000}); + REQUIRE(jit.GetVector(3) == Vector{0x000000000003fffc, 0x0000000000000000}); + REQUIRE(jit.GetVector(4) == Vector{0x000000000007fff8, 0x0000000000000000}); + REQUIRE(jit.GetVector(5) == Vector{0x00000001fffffffe, 0x0000000000000000}); + REQUIRE(jit.GetVector(6) == Vector{0x00000003fffffffc, 0x0000000000000000}); +} + +TEST_CASE("A64: SADDL{V,P}", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x0E303801); // SADDLV h1, v0.8b + env.code_mem.emplace_back(0x4E303802); // SADDLV h2, v0.16b + env.code_mem.emplace_back(0x0E703803); // SADDLV s3, v0.4h + env.code_mem.emplace_back(0x4E703804); // SADDLV s4, v0.8h + env.code_mem.emplace_back(0x0EA02805); // SADDLP v5.1d, v0.2s + env.code_mem.emplace_back(0x4EB03806); // SADDLV d6, v0.4s + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetVector(0, {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}); + jit.SetPC(0); + + env.ticks_left = 7; + jit.Run(); + + REQUIRE(jit.GetVector(1) == Vector{0x000000000000fff8, 0x0000000000000000}); + REQUIRE(jit.GetVector(2) == Vector{0x000000000000fff0, 0x0000000000000000}); + REQUIRE(jit.GetVector(3) == Vector{0x00000000fffffffc, 0x0000000000000000}); + REQUIRE(jit.GetVector(4) == Vector{0x00000000fffffff8, 0x0000000000000000}); + REQUIRE(jit.GetVector(5) == Vector{0xfffffffffffffffe, 0x0000000000000000}); + REQUIRE(jit.GetVector(6) == Vector{0xfffffffffffffffc, 0x0000000000000000}); +} + TEST_CASE("A64: VQADD", "[a64]") { A64TestEnv env; A64::Jit jit{A64::UserConfig{&env}}; diff --git a/externals/dynarmic/tests/cpu_info.cpp b/externals/dynarmic/tests/cpu_info.cpp index 8a1ca4647..b65de18e5 100755 --- a/externals/dynarmic/tests/cpu_info.cpp +++ b/externals/dynarmic/tests/cpu_info.cpp @@ -73,8 +73,6 @@ TEST_CASE("Host CPU supports", "[a64]") { std::make_pair(Xbyak::util::Cpu::tSSE3, "SSE3"), std::make_pair(Xbyak::util::Cpu::tSSE41, "SSE41"), std::make_pair(Xbyak::util::Cpu::tSSE42, "SSE42"), - std::make_pair(Xbyak::util::Cpu::tSSE4a, "SSE4a"), - std::make_pair(Xbyak::util::Cpu::tSSE5, "SSE5"), std::make_pair(Xbyak::util::Cpu::tSSSE3, "SSSE3"), std::make_pair(Xbyak::util::Cpu::tVAES, "VAES"), std::make_pair(Xbyak::util::Cpu::tVPCLMULQDQ, "VPCLMULQDQ"), diff --git a/src/yuzu/configuration/configure_tas.ui b/src/yuzu/configuration/configure_tas.ui index 3972f9083..6caa19031 100755 --- a/src/yuzu/configuration/configure_tas.ui +++ b/src/yuzu/configuration/configure_tas.ui @@ -2,14 +2,6 @@ ConfigureTas - - - 0 - 0 - 337 - 316 - -