diff options
Diffstat (limited to '')
52 files changed, 2371 insertions, 4770 deletions
diff --git a/externals/dynarmic b/externals/dynarmic -Subproject 36082087ded632079b16d24137fdd0c450ce82e +Subproject 459d7d1bafcf85677c989b7cb260d3789aa813e diff --git a/externals/microprofile/microprofileui.h b/externals/microprofile/microprofileui.h index 66a73abc5..09223b33f 100644 --- a/externals/microprofile/microprofileui.h +++ b/externals/microprofile/microprofileui.h @@ -1231,7 +1231,7 @@ void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY, char ThreadName[MicroProfileThreadLog::THREAD_MAX_LEN + 16]; const char* cLocal = MicroProfileIsLocalThread(nThreadId) ? "*": " "; -#if defined(WIN32) +#if defined(_WIN32) // nThreadId is 32-bit on Windows int nStrLen = snprintf(ThreadName, sizeof(ThreadName)-1, "%04x: %s%s", nThreadId, cLocal, i < nNumThreadsBase ? &S.Pool[i]->ThreadName[0] : MICROPROFILE_THREAD_NAME_FROM_ID(nThreadId) ); #else diff --git a/externals/nihstro b/externals/nihstro -Subproject 7e24743af21a7c2e3cef21ef174ae4269d0cfda +Subproject 26a0a04a458df2b9ba6e39608bee183d8a0a00e diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp index 2bbf7146e..92484c526 100644 --- a/src/audio_core/hle/source.cpp +++ b/src/audio_core/hle/source.cpp @@ -158,6 +158,14 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config, static_cast<size_t>(state.mono_or_stereo)); } + u32_dsp play_position = {}; + if (config.play_position_dirty && config.play_position != 0) { + config.play_position_dirty.Assign(0); + play_position = config.play_position; + // play_position applies only to the embedded buffer, and defaults to 0 w/o a dirty bit + // This will be the starting sample for the first time the buffer is played. + } + if (config.embedded_buffer_dirty) { config.embedded_buffer_dirty.Assign(0); state.input_queue.emplace(Buffer{ @@ -171,9 +179,18 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config, state.mono_or_stereo, state.format, false, + play_position, + false, }); - LOG_TRACE(Audio_DSP, "enqueuing embedded addr=0x%08x len=%u id=%hu", - config.physical_address, config.length, config.buffer_id); + LOG_TRACE(Audio_DSP, "enqueuing embedded addr=0x%08x len=%u id=%hu start=%u", + config.physical_address, config.length, config.buffer_id, + static_cast<u32>(config.play_position)); + } + + if (config.loop_related_dirty && config.loop_related != 0) { + config.loop_related_dirty.Assign(0); + LOG_WARNING(Audio_DSP, "Unhandled complex loop with loop_related=0x%08x", + static_cast<u32>(config.loop_related)); } if (config.buffer_queue_dirty) { @@ -192,6 +209,8 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config, state.mono_or_stereo, state.format, true, + {}, // 0 in u32_dsp + false, }); LOG_TRACE(Audio_DSP, "enqueuing queued %zu addr=0x%08x len=%u id=%hu", i, b.physical_address, b.length, b.buffer_id); @@ -247,18 +266,18 @@ bool Source::DequeueBuffer() { if (state.input_queue.empty()) return false; - const Buffer buf = state.input_queue.top(); - state.input_queue.pop(); + Buffer buf = state.input_queue.top(); + + // if we're in a loop, the current sound keeps playing afterwards, so leave the queue alone + if (!buf.is_looping) { + state.input_queue.pop(); + } if (buf.adpcm_dirty) { state.adpcm_state.yn1 = buf.adpcm_yn[0]; state.adpcm_state.yn2 = buf.adpcm_yn[1]; } - if (buf.is_looping) { - LOG_ERROR(Audio_DSP, "Looped buffers are unimplemented at the moment"); - } - const u8* const memory = Memory::GetPhysicalPointer(buf.physical_address); if (memory) { const unsigned num_channels = buf.mono_or_stereo == MonoOrStereo::Stereo ? 2 : 1; @@ -305,10 +324,13 @@ bool Source::DequeueBuffer() { break; } - state.current_sample_number = 0; - state.next_sample_number = 0; + // the first playthrough starts at play_position, loops start at the beginning of the buffer + state.current_sample_number = (!buf.has_played) ? buf.play_position : 0; + state.next_sample_number = state.current_sample_number; state.current_buffer_id = buf.buffer_id; - state.buffer_update = buf.from_queue; + state.buffer_update = buf.from_queue && !buf.has_played; + + buf.has_played = true; LOG_TRACE(Audio_DSP, "source_id=%zu buffer_id=%hu from_queue=%s current_buffer.size()=%zu", source_id, buf.buffer_id, buf.from_queue ? "true" : "false", diff --git a/src/audio_core/hle/source.h b/src/audio_core/hle/source.h index 3d725f2a3..ccb7f064f 100644 --- a/src/audio_core/hle/source.h +++ b/src/audio_core/hle/source.h @@ -76,6 +76,8 @@ private: Format format; bool from_queue; + u32_dsp play_position; // = 0; + bool has_played; // = false; }; struct BufferOrder { diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 827c90e55..fac1c9a0e 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -93,6 +93,21 @@ void Config::ReadValues() { Settings::values.region_value = sdl2_config->GetInteger("System", "region_value", Settings::REGION_VALUE_AUTO_SELECT); + // Camera + using namespace Service::CAM; + Settings::values.camera_name[OuterRightCamera] = + sdl2_config->Get("Camera", "camera_outer_right_name", "blank"); + Settings::values.camera_config[OuterRightCamera] = + sdl2_config->Get("Camera", "camera_outer_right_config", ""); + Settings::values.camera_name[InnerCamera] = + sdl2_config->Get("Camera", "camera_inner_name", "blank"); + Settings::values.camera_config[InnerCamera] = + sdl2_config->Get("Camera", "camera_inner_config", ""); + Settings::values.camera_name[OuterLeftCamera] = + sdl2_config->Get("Camera", "camera_outer_left_name", "blank"); + Settings::values.camera_config[OuterLeftCamera] = + sdl2_config->Get("Camera", "camera_outer_left_config", ""); + // Miscellaneous Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Info"); diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index d728fb9e8..435ba6f00 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -109,6 +109,22 @@ is_new_3ds = # -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan region_value = +[Camera] +# Which camera engine to use for the right outer camera +# blank (default): a dummy camera that always returns black image +camera_outer_right_name = + +# A config string for the right outer camera. Its meaning is defined by the camera engine +camera_outer_right_config = + +# ... for the left outer camera +camera_outer_left_name = +camera_outer_left_config = + +# ... for the inner camera +camera_inner_name = +camera_inner_config = + [Miscellaneous] # A filter which removes logs below a certain logging level. # Examples: *:Debug Kernel.SVC:Trace Service.*:Critical diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp index f776e16b2..b65f57fdc 100644 --- a/src/citra_qt/config.cpp +++ b/src/citra_qt/config.cpp @@ -67,6 +67,22 @@ void Config::ReadValues() { qt_config->value("output_device", "auto").toString().toStdString(); qt_config->endGroup(); + using namespace Service::CAM; + qt_config->beginGroup("Camera"); + Settings::values.camera_name[OuterRightCamera] = + qt_config->value("camera_outer_right_name", "blank").toString().toStdString(); + Settings::values.camera_config[OuterRightCamera] = + qt_config->value("camera_outer_right_config", "").toString().toStdString(); + Settings::values.camera_name[InnerCamera] = + qt_config->value("camera_inner_name", "blank").toString().toStdString(); + Settings::values.camera_config[InnerCamera] = + qt_config->value("camera_inner_config", "").toString().toStdString(); + Settings::values.camera_name[OuterLeftCamera] = + qt_config->value("camera_outer_left_name", "blank").toString().toStdString(); + Settings::values.camera_config[OuterLeftCamera] = + qt_config->value("camera_outer_left_config", "").toString().toStdString(); + qt_config->endGroup(); + qt_config->beginGroup("Data Storage"); Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool(); qt_config->endGroup(); @@ -174,6 +190,22 @@ void Config::SaveValues() { qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id)); qt_config->endGroup(); + using namespace Service::CAM; + qt_config->beginGroup("Camera"); + qt_config->setValue("camera_outer_right_name", + QString::fromStdString(Settings::values.camera_name[OuterRightCamera])); + qt_config->setValue("camera_outer_right_config", + QString::fromStdString(Settings::values.camera_config[OuterRightCamera])); + qt_config->setValue("camera_inner_name", + QString::fromStdString(Settings::values.camera_name[InnerCamera])); + qt_config->setValue("camera_inner_config", + QString::fromStdString(Settings::values.camera_config[InnerCamera])); + qt_config->setValue("camera_outer_left_name", + QString::fromStdString(Settings::values.camera_name[OuterLeftCamera])); + qt_config->setValue("camera_outer_left_config", + QString::fromStdString(Settings::values.camera_config[OuterLeftCamera])); + qt_config->endGroup(); + qt_config->beginGroup("Data Storage"); qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd); qt_config->endGroup(); diff --git a/src/citra_qt/configure_input.cpp b/src/citra_qt/configure_input.cpp index 3e6803b8a..c29652f32 100644 --- a/src/citra_qt/configure_input.cpp +++ b/src/citra_qt/configure_input.cpp @@ -17,7 +17,6 @@ static QString getKeyName(Qt::Key key_code) { case Qt::Key_Alt: return QObject::tr("Alt"); case Qt::Key_Meta: - case -1: return ""; default: return QKeySequence(key_code).toString(); diff --git a/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp index f5a2ec761..ee79f0edf 100644 --- a/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp @@ -20,13 +20,14 @@ #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" #include "video_core/pica_state.h" +#include "video_core/texture/texture_decode.h" namespace { -QImage LoadTexture(const u8* src, const Pica::DebugUtils::TextureInfo& info) { +QImage LoadTexture(const u8* src, const Pica::Texture::TextureInfo& info) { QImage decoded_image(info.width, info.height, QImage::Format_ARGB32); for (int y = 0; y < info.height; ++y) { for (int x = 0; x < info.width; ++x) { - Math::Vec4<u8> color = Pica::DebugUtils::LookupTexture(src, x, y, info, true); + Math::Vec4<u8> color = Pica::Texture::LookupTexture(src, x, y, info, true); decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a())); } } @@ -36,9 +37,10 @@ QImage LoadTexture(const u8* src, const Pica::DebugUtils::TextureInfo& info) { class TextureInfoWidget : public QWidget { public: - TextureInfoWidget(const u8* src, const Pica::DebugUtils::TextureInfo& info, + TextureInfoWidget(const u8* src, const Pica::Texture::TextureInfo& info, QWidget* parent = nullptr) : QWidget(parent) { + QLabel* image_widget = new QLabel; QPixmap image_pixmap = QPixmap::fromImage(LoadTexture(src, info)); image_pixmap = image_pixmap.scaled(200, 100, Qt::KeepAspectRatio, Qt::SmoothTransformation); @@ -160,7 +162,7 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { const auto config = texture.config; const auto format = texture.format; - const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); + const auto info = Pica::Texture::TextureInfo::FromPicaRegister(config, format); const u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress()); new_info_widget = new TextureInfoWidget(src, info); } diff --git a/src/citra_qt/debugger/graphics/graphics_surface.cpp b/src/citra_qt/debugger/graphics/graphics_surface.cpp index 4efd95d3c..bd82b00d4 100644 --- a/src/citra_qt/debugger/graphics/graphics_surface.cpp +++ b/src/citra_qt/debugger/graphics/graphics_surface.cpp @@ -18,6 +18,7 @@ #include "core/memory.h" #include "video_core/pica.h" #include "video_core/pica_state.h" +#include "video_core/texture/texture_decode.h" #include "video_core/utils.h" SurfacePicture::SurfacePicture(QWidget* parent, GraphicsSurfaceWidget* surface_widget_) @@ -512,7 +513,7 @@ void GraphicsSurfaceWidget::OnUpdate() { } const auto texture = Pica::g_state.regs.GetTextures()[texture_index]; - auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); + auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format); surface_address = info.physical_address; surface_width = info.width; @@ -567,28 +568,27 @@ void GraphicsSurfaceWidget::OnUpdate() { surface_picture_label->show(); - unsigned nibbles_per_pixel = GraphicsSurfaceWidget::NibblesPerPixel(surface_format); - unsigned stride = nibbles_per_pixel * surface_width / 2; - - // We handle depth formats here because DebugUtils only supports TextureFormats if (surface_format <= Format::MaxTextureFormat) { - // Generate a virtual texture - Pica::DebugUtils::TextureInfo info; + Pica::Texture::TextureInfo info; info.physical_address = surface_address; info.width = surface_width; info.height = surface_height; info.format = static_cast<Pica::Regs::TextureFormat>(surface_format); - info.stride = stride; + info.SetDefaultStride(); for (unsigned int y = 0; y < surface_height; ++y) { for (unsigned int x = 0; x < surface_width; ++x) { - Math::Vec4<u8> color = Pica::DebugUtils::LookupTexture(buffer, x, y, info, true); + Math::Vec4<u8> color = Pica::Texture::LookupTexture(buffer, x, y, info, true); decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a())); } } - } else { + // We handle depth formats here because DebugUtils only supports TextureFormats + + // TODO(yuriks): Convert to newer tile-based addressing + unsigned nibbles_per_pixel = GraphicsSurfaceWidget::NibblesPerPixel(surface_format); + unsigned stride = nibbles_per_pixel * surface_width / 2; ASSERT_MSG(nibbles_per_pixel >= 2, "Depth decoder only supports formats with at least one byte per pixel"); diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index a7a4a688c..592911c2b 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -61,14 +61,11 @@ set(HEADERS if(ARCHITECTURE_x86_64) set(SRCS ${SRCS} - x64/abi.cpp x64/cpu_detect.cpp - x64/emitter.cpp) + ) set(HEADERS ${HEADERS} - x64/abi.h x64/cpu_detect.h - x64/emitter.h x64/xbyak_abi.h x64/xbyak_util.h ) diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index 1a1f5d9b5..df234c225 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp @@ -303,7 +303,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) { // copy loop while (!feof(input)) { // read input - int rnum = fread(buffer, sizeof(char), BSIZE, input); + size_t rnum = fread(buffer, sizeof(char), BSIZE, input); if (rnum != BSIZE) { if (ferror(input) != 0) { LOG_ERROR(Common_Filesystem, "failed reading from source, %s --> %s: %s", @@ -313,7 +313,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) { } // write output - int wnum = fwrite(buffer, sizeof(char), rnum, output); + size_t wnum = fwrite(buffer, sizeof(char), rnum, output); if (wnum != rnum) { LOG_ERROR(Common_Filesystem, "failed writing to output, %s --> %s: %s", srcFilename.c_str(), destFilename.c_str(), GetLastErrorMsg()); diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp deleted file mode 100644 index 504b9c940..000000000 --- a/src/common/x64/abi.cpp +++ /dev/null @@ -1,350 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include "abi.h" -#include "emitter.h" - -using namespace Gen; - -// Shared code between Win64 and Unix64 - -void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, - size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { - size_t shadow = 0; -#if defined(_WIN32) - shadow = 0x20; -#endif - - int count = (mask & ABI_ALL_GPRS).Count(); - rsp_alignment -= count * 8; - size_t subtraction = 0; - int fpr_count = (mask & ABI_ALL_FPRS).Count(); - if (fpr_count) { - // If we have any XMMs to save, we must align the stack here. - subtraction = rsp_alignment & 0xf; - } - subtraction += 16 * fpr_count; - size_t xmm_base_subtraction = subtraction; - subtraction += needed_frame_size; - subtraction += shadow; - // Final alignment. - rsp_alignment -= subtraction; - subtraction += rsp_alignment & 0xf; - - *shadowp = shadow; - *subtractionp = subtraction; - *xmm_offsetp = subtraction - xmm_base_subtraction; -} - -size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, - size_t needed_frame_size) { - size_t shadow, subtraction, xmm_offset; - ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, - &xmm_offset); - - for (int r : mask& ABI_ALL_GPRS) - PUSH((X64Reg)r); - - if (subtraction) - SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); - - for (int x : mask& ABI_ALL_FPRS) { - MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16)); - xmm_offset += 16; - } - - return shadow; -} - -void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, - size_t needed_frame_size) { - size_t shadow, subtraction, xmm_offset; - ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, - &xmm_offset); - - for (int x : mask& ABI_ALL_FPRS) { - MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset)); - xmm_offset += 16; - } - - if (subtraction) - ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); - - for (int r = 15; r >= 0; r--) { - if (mask[r]) - POP((X64Reg)r); - } -} - -// Common functions -void XEmitter::ABI_CallFunction(const void* func) { - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionC16(const void* func, u16 param1) { - MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionC(const void* func, u32 param1) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCC(const void* func, u32 param1, u32 param2) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - MOV(32, R(ABI_PARAM3), Imm32(param3)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - MOV(64, R(ABI_PARAM3), ImmPtr(param3)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, - void* param4) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - MOV(32, R(ABI_PARAM3), Imm32(param3)); - MOV(64, R(ABI_PARAM4), ImmPtr(param4)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionP(const void* func, void* param1) { - MOV(64, R(ABI_PARAM1), ImmPtr(param1)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionPA(const void* func, void* param1, const Gen::OpArg& arg2) { - MOV(64, R(ABI_PARAM1), ImmPtr(param1)); - if (!arg2.IsSimpleReg(ABI_PARAM2)) - MOV(32, R(ABI_PARAM2), arg2); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionPAA(const void* func, void* param1, const Gen::OpArg& arg2, - const Gen::OpArg& arg3) { - MOV(64, R(ABI_PARAM1), ImmPtr(param1)); - if (!arg2.IsSimpleReg(ABI_PARAM2)) - MOV(32, R(ABI_PARAM2), arg2); - if (!arg3.IsSimpleReg(ABI_PARAM3)) - MOV(32, R(ABI_PARAM3), arg3); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3) { - MOV(64, R(ABI_PARAM1), ImmPtr(param1)); - MOV(64, R(ABI_PARAM2), ImmPtr(param2)); - MOV(32, R(ABI_PARAM3), Imm32(param3)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -// Pass a register as a parameter. -void XEmitter::ABI_CallFunctionR(const void* func, X64Reg reg1) { - if (reg1 != ABI_PARAM1) - MOV(32, R(ABI_PARAM1), R(reg1)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -// Pass two registers as parameters. -void XEmitter::ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2) { - if (reg2 != ABI_PARAM1) { - if (reg1 != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg1)); - if (reg2 != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(reg2)); - } else { - if (reg2 != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(reg2)); - if (reg1 != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg1)); - } - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionAC(const void* func, const Gen::OpArg& arg1, u32 param2) { - if (!arg1.IsSimpleReg(ABI_PARAM1)) - MOV(32, R(ABI_PARAM1), arg1); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionACC(const void* func, const Gen::OpArg& arg1, u32 param2, - u32 param3) { - if (!arg1.IsSimpleReg(ABI_PARAM1)) - MOV(32, R(ABI_PARAM1), arg1); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - MOV(64, R(ABI_PARAM3), Imm64(param3)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionA(const void* func, const Gen::OpArg& arg1) { - if (!arg1.IsSimpleReg(ABI_PARAM1)) - MOV(32, R(ABI_PARAM1), arg1); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionAA(const void* func, const Gen::OpArg& arg1, - const Gen::OpArg& arg2) { - if (!arg1.IsSimpleReg(ABI_PARAM1)) - MOV(32, R(ABI_PARAM1), arg1); - if (!arg2.IsSimpleReg(ABI_PARAM2)) - MOV(32, R(ABI_PARAM2), arg2); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -}
\ No newline at end of file diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h deleted file mode 100644 index eaaf81d89..000000000 --- a/src/common/x64/abi.h +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "common/bit_set.h" -#include "emitter.h" - -// x64 ABI:s, and helpers to help follow them when JIT-ing code. -// All convensions return values in EAX (+ possibly EDX). - -// Windows 64-bit -// * 4-reg "fastcall" variant, very new-skool stack handling -// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself -// calls_ -// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space. -// Scratch: RAX RCX RDX R8 R9 R10 R11 -// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15 -// Parameters: RCX RDX R8 R9, further MOV-ed - -// Linux 64-bit -// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed) -// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11 -// Callee-save: RBX RBP R12 R13 R14 R15 -// Parameters: RDI RSI RDX RCX R8 R9 - -#define ABI_ALL_FPRS BitSet32(0xffff0000) -#define ABI_ALL_GPRS BitSet32(0x0000ffff) - -#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention - -#define ABI_PARAM1 RCX -#define ABI_PARAM2 RDX -#define ABI_PARAM3 R8 -#define ABI_PARAM4 R9 - -// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers. -#define ABI_ALL_CALLER_SAVED \ - (BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \ - XMM4 + 16, XMM5 + 16}) -#else // 64-bit Unix / OS X - -#define ABI_PARAM1 RDI -#define ABI_PARAM2 RSI -#define ABI_PARAM3 RDX -#define ABI_PARAM4 RCX -#define ABI_PARAM5 R8 -#define ABI_PARAM6 R9 - -// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably -// don't actually clobber them. -#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS) -#endif // WIN32 - -#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED) - -#define ABI_RETURN RAX diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp deleted file mode 100644 index f5930abec..000000000 --- a/src/common/x64/emitter.cpp +++ /dev/null @@ -1,2583 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include <cinttypes> -#include <cstring> -#include "abi.h" -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/memory_util.h" -#include "cpu_detect.h" -#include "emitter.h" - -namespace Gen { - -struct NormalOpDef { - u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext; -}; - -// 0xCC is code for invalid combination of immediates -static const NormalOpDef normalops[11] = { - {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, // ADD - {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, // ADC - - {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, // SUB - {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, // SBB - - {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, // AND - {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, // OR - - {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, // XOR - {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, // MOV - - {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, // TEST (to == from) - {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, // CMP - - {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, // XCHG -}; - -enum NormalSSEOps { - sseCMP = 0xC2, - sseADD = 0x58, // ADD - sseSUB = 0x5C, // SUB - sseAND = 0x54, // AND - sseANDN = 0x55, // ANDN - sseOR = 0x56, - sseXOR = 0x57, - sseMUL = 0x59, // MUL - sseDIV = 0x5E, // DIV - sseMIN = 0x5D, // MIN - sseMAX = 0x5F, // MAX - sseCOMIS = 0x2F, // COMIS - sseUCOMIS = 0x2E, // UCOMIS - sseSQRT = 0x51, // SQRT - sseRSQRT = 0x52, // RSQRT (NO DOUBLE PRECISION!!!) - sseRCP = 0x53, // RCP - sseMOVAPfromRM = 0x28, // MOVAP from RM - sseMOVAPtoRM = 0x29, // MOVAP to RM - sseMOVUPfromRM = 0x10, // MOVUP from RM - sseMOVUPtoRM = 0x11, // MOVUP to RM - sseMOVLPfromRM = 0x12, - sseMOVLPtoRM = 0x13, - sseMOVHPfromRM = 0x16, - sseMOVHPtoRM = 0x17, - sseMOVHLPS = 0x12, - sseMOVLHPS = 0x16, - sseMOVDQfromRM = 0x6F, - sseMOVDQtoRM = 0x7F, - sseMASKMOVDQU = 0xF7, - sseLDDQU = 0xF0, - sseSHUF = 0xC6, - sseMOVNTDQ = 0xE7, - sseMOVNTP = 0x2B, - sseHADD = 0x7C, -}; - -void XEmitter::SetCodePtr(u8* ptr) { - code = ptr; -} - -const u8* XEmitter::GetCodePtr() const { - return code; -} - -u8* XEmitter::GetWritableCodePtr() { - return code; -} - -void XEmitter::Write8(u8 value) { - *code++ = value; -} - -void XEmitter::Write16(u16 value) { - std::memcpy(code, &value, sizeof(u16)); - code += sizeof(u16); -} - -void XEmitter::Write32(u32 value) { - std::memcpy(code, &value, sizeof(u32)); - code += sizeof(u32); -} - -void XEmitter::Write64(u64 value) { - std::memcpy(code, &value, sizeof(u64)); - code += sizeof(u64); -} - -void XEmitter::ReserveCodeSpace(int bytes) { - for (int i = 0; i < bytes; i++) - *code++ = 0xCC; -} - -const u8* XEmitter::AlignCode4() { - int c = int((u64)code & 3); - if (c) - ReserveCodeSpace(4 - c); - return code; -} - -const u8* XEmitter::AlignCode16() { - int c = int((u64)code & 15); - if (c) - ReserveCodeSpace(16 - c); - return code; -} - -const u8* XEmitter::AlignCodePage() { - int c = int((u64)code & 4095); - if (c) - ReserveCodeSpace(4096 - c); - return code; -} - -// This operation modifies flags; check to see the flags are locked. -// If the flags are locked, we should immediately and loudly fail before -// causing a subtle JIT bug. -void XEmitter::CheckFlags() { - ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!"); -} - -void XEmitter::WriteModRM(int mod, int reg, int rm) { - Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7))); -} - -void XEmitter::WriteSIB(int scale, int index, int base) { - Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7))); -} - -void OpArg::WriteRex(XEmitter* emit, int opBits, int bits, int customOp) const { - if (customOp == -1) - customOp = operandReg; -#ifdef ARCHITECTURE_x86_64 - u8 op = 0x40; - // REX.W (whether operation is a 64-bit operation) - if (opBits == 64) - op |= 8; - // REX.R (whether ModR/M reg field refers to R8-R15. - if (customOp & 8) - op |= 4; - // REX.X (whether ModR/M SIB index field refers to R8-R15) - if (indexReg & 8) - op |= 2; - // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15) - if (offsetOrBaseReg & 8) - op |= 1; - // Write REX if wr have REX bits to write, or if the operation accesses - // SIL, DIL, BPL, or SPL. - if (op != 0x40 || (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || - (opBits == 8 && (customOp & 0x10c) == 4)) { - emit->Write8(op); - // Check the operation doesn't access AH, BH, CH, or DH. - DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0); - DEBUG_ASSERT((customOp & 0x100) == 0); - } -#else - DEBUG_ASSERT(opBits != 64); - DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1); - DEBUG_ASSERT((indexReg & 8) == 0); - DEBUG_ASSERT((offsetOrBaseReg & 8) == 0); - DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1); - DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4); -#endif -} - -void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, - int W) const { - int R = !(regOp1 & 8); - int X = !(indexReg & 8); - int B = !(offsetOrBaseReg & 8); - - int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf); - - // do we need any VEX fields that only appear in the three-byte form? - if (X == 1 && B == 1 && W == 0 && mmmmm == 1) { - u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 2) | pp; - emit->Write8(0xC5); - emit->Write8(RvvvvLpp); - } else { - u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm; - u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 2) | pp; - emit->Write8(0xC4); - emit->Write8(RXBmmmmm); - emit->Write8(WvvvvLpp); - } -} - -void OpArg::WriteRest(XEmitter* emit, int extraBytes, X64Reg _operandReg, - bool warn_64bit_offset) const { - if (_operandReg == INVALID_REG) - _operandReg = (X64Reg)this->operandReg; - int mod = 0; - int ireg = indexReg; - bool SIB = false; - int _offsetOrBaseReg = this->offsetOrBaseReg; - - if (scale == SCALE_RIP) // Also, on 32-bit, just an immediate address - { - // Oh, RIP addressing. - _offsetOrBaseReg = 5; - emit->WriteModRM(0, _operandReg, _offsetOrBaseReg); -// TODO : add some checks -#ifdef ARCHITECTURE_x86_64 - u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes; - s64 distance = (s64)offset - (s64)ripAddr; - ASSERT_MSG((distance < 0x80000000LL && distance >= -0x80000000LL) || !warn_64bit_offset, - "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", ripAddr, - offset); - s32 offs = (s32)distance; - emit->Write32((u32)offs); -#else - emit->Write32((u32)offset); -#endif - return; - } - - if (scale == 0) { - // Oh, no memory, Just a reg. - mod = 3; // 11 - } else if (scale >= 1) { - // Ah good, no scaling. - if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) { - // Okay, we're good. No SIB necessary. - int ioff = (int)offset; - if (ioff == 0) { - mod = 0; - } else if (ioff < -128 || ioff > 127) { - mod = 2; // 32-bit displacement - } else { - mod = 1; // 8-bit displacement - } - } else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) { - SIB = true; - mod = 0; - _offsetOrBaseReg = 5; - } else // if (scale != SCALE_ATREG) - { - if ((_offsetOrBaseReg & 7) == 4) // this would occupy the SIB encoding :( - { - // So we have to fake it with SIB encoding :( - SIB = true; - } - - if (scale >= SCALE_1 && scale < SCALE_ATREG) { - SIB = true; - } - - if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) { - SIB = true; - ireg = _offsetOrBaseReg; - } - - // Okay, we're fine. Just disp encoding. - // We need displacement. Which size? - int ioff = (int)(s64)offset; - if (ioff < -128 || ioff > 127) { - mod = 2; // 32-bit displacement - } else { - mod = 1; // 8-bit displacement - } - } - } - - // Okay. Time to do the actual writing - // ModRM byte: - int oreg = _offsetOrBaseReg; - if (SIB) - oreg = 4; - - // TODO(ector): WTF is this if about? I don't remember writing it :-) - // if (RIP) - // oreg = 5; - - emit->WriteModRM(mod, _operandReg & 7, oreg & 7); - - if (SIB) { - // SIB byte - int ss; - switch (scale) { - case SCALE_NONE: - _offsetOrBaseReg = 4; - ss = 0; - break; // RSP - case SCALE_1: - ss = 0; - break; - case SCALE_2: - ss = 1; - break; - case SCALE_4: - ss = 2; - break; - case SCALE_8: - ss = 3; - break; - case SCALE_NOBASE_2: - ss = 1; - break; - case SCALE_NOBASE_4: - ss = 2; - break; - case SCALE_NOBASE_8: - ss = 3; - break; - case SCALE_ATREG: - ss = 0; - break; - default: - ASSERT_MSG(0, "Invalid scale for SIB byte"); - ss = 0; - break; - } - emit->Write8((u8)((ss << 6) | ((ireg & 7) << 3) | (_offsetOrBaseReg & 7))); - } - - if (mod == 1) // 8-bit disp - { - emit->Write8((u8)(s8)(s32)offset); - } else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) // 32-bit disp - { - emit->Write32((u32)offset); - } -} - -// W = operand extended width (1 if 64-bit) -// R = register# upper bit -// X = scale amnt upper bit -// B = base register# upper bit -void XEmitter::Rex(int w, int r, int x, int b) { - w = w ? 1 : 0; - r = r ? 1 : 0; - x = x ? 1 : 0; - b = b ? 1 : 0; - u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b)); - if (rx != 0x40) - Write8(rx); -} - -void XEmitter::JMP(const u8* addr, bool force5Bytes) { - u64 fn = (u64)addr; - if (!force5Bytes) { - s64 distance = (s64)(fn - ((u64)code + 2)); - ASSERT_MSG(distance >= -0x80 && distance < 0x80, - "Jump target too far away, needs force5Bytes = true"); - // 8 bits will do - Write8(0xEB); - Write8((u8)(s8)distance); - } else { - s64 distance = (s64)(fn - ((u64)code + 5)); - - ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); - Write8(0xE9); - Write32((u32)(s32)distance); - } -} - -void XEmitter::JMPptr(const OpArg& arg2) { - OpArg arg = arg2; - if (arg.IsImm()) - ASSERT_MSG(0, "JMPptr - Imm argument"); - arg.operandReg = 4; - arg.WriteRex(this, 0, 0); - Write8(0xFF); - arg.WriteRest(this); -} - -// Can be used to trap other processors, before overwriting their code -// not used in dolphin -void XEmitter::JMPself() { - Write8(0xEB); - Write8(0xFE); -} - -void XEmitter::CALLptr(OpArg arg) { - if (arg.IsImm()) - ASSERT_MSG(0, "CALLptr - Imm argument"); - arg.operandReg = 2; - arg.WriteRex(this, 0, 0); - Write8(0xFF); - arg.WriteRest(this); -} - -void XEmitter::CALL(const void* fnptr) { - u64 distance = u64(fnptr) - (u64(code) + 5); - ASSERT_MSG(distance < 0x0000000080000000ULL || distance >= 0xFFFFFFFF80000000ULL, - "CALL out of range (%p calls %p)", code, fnptr); - Write8(0xE8); - Write32(u32(distance)); -} - -FixupBranch XEmitter::CALL() { - FixupBranch branch; - branch.type = 1; - branch.ptr = code + 5; - - Write8(0xE8); - Write32(0); - - return branch; -} - -FixupBranch XEmitter::J(bool force5bytes) { - FixupBranch branch; - branch.type = force5bytes ? 1 : 0; - branch.ptr = code + (force5bytes ? 5 : 2); - if (!force5bytes) { - // 8 bits will do - Write8(0xEB); - Write8(0); - } else { - Write8(0xE9); - Write32(0); - } - return branch; -} - -FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) { - FixupBranch branch; - branch.type = force5bytes ? 1 : 0; - branch.ptr = code + (force5bytes ? 6 : 2); - if (!force5bytes) { - // 8 bits will do - Write8(0x70 + conditionCode); - Write8(0); - } else { - Write8(0x0F); - Write8(0x80 + conditionCode); - Write32(0); - } - return branch; -} - -void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) { - u64 fn = (u64)addr; - s64 distance = (s64)(fn - ((u64)code + 2)); - if (distance < -0x80 || distance >= 0x80 || force5bytes) { - distance = (s64)(fn - ((u64)code + 6)); - ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); - Write8(0x0F); - Write8(0x80 + conditionCode); - Write32((u32)(s32)distance); - } else { - Write8(0x70 + conditionCode); - Write8((u8)(s8)distance); - } -} - -void XEmitter::SetJumpTarget(const FixupBranch& branch) { - if (branch.type == 0) { - s64 distance = (s64)(code - branch.ptr); - ASSERT_MSG(distance >= -0x80 && distance < 0x80, - "Jump target too far away, needs force5Bytes = true"); - branch.ptr[-1] = (u8)(s8)distance; - } else if (branch.type == 1) { - s64 distance = (s64)(code - branch.ptr); - ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); - ((s32*)branch.ptr)[-1] = (s32)distance; - } -} - -void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) { - if (branch.type == 0) { - s64 distance = (s64)(target - branch.ptr); - ASSERT_MSG(distance >= -0x80 && distance < 0x80, - "Jump target too far away, needs force5Bytes = true"); - branch.ptr[-1] = (u8)(s8)distance; - } else if (branch.type == 1) { - s64 distance = (s64)(target - branch.ptr); - ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); - ((s32*)branch.ptr)[-1] = (s32)distance; - } -} - -// Single byte opcodes -// There is no PUSHAD/POPAD in 64-bit mode. -void XEmitter::INT3() { - Write8(0xCC); -} -void XEmitter::RET() { - Write8(0xC3); -} -void XEmitter::RET_FAST() { - Write8(0xF3); - Write8(0xC3); -} // two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a - // ret - -// The first sign of decadence: optimized NOPs. -void XEmitter::NOP(size_t size) { - DEBUG_ASSERT((int)size > 0); - while (true) { - switch (size) { - case 0: - return; - case 1: - Write8(0x90); - return; - case 2: - Write8(0x66); - Write8(0x90); - return; - case 3: - Write8(0x0F); - Write8(0x1F); - Write8(0x00); - return; - case 4: - Write8(0x0F); - Write8(0x1F); - Write8(0x40); - Write8(0x00); - return; - case 5: - Write8(0x0F); - Write8(0x1F); - Write8(0x44); - Write8(0x00); - Write8(0x00); - return; - case 6: - Write8(0x66); - Write8(0x0F); - Write8(0x1F); - Write8(0x44); - Write8(0x00); - Write8(0x00); - return; - case 7: - Write8(0x0F); - Write8(0x1F); - Write8(0x80); - Write8(0x00); - Write8(0x00); - Write8(0x00); - Write8(0x00); - return; - case 8: - Write8(0x0F); - Write8(0x1F); - Write8(0x84); - Write8(0x00); - Write8(0x00); - Write8(0x00); - Write8(0x00); - Write8(0x00); - return; - case 9: - Write8(0x66); - Write8(0x0F); - Write8(0x1F); - Write8(0x84); - Write8(0x00); - Write8(0x00); - Write8(0x00); - Write8(0x00); - Write8(0x00); - return; - case 10: - Write8(0x66); - Write8(0x66); - Write8(0x0F); - Write8(0x1F); - Write8(0x84); - Write8(0x00); - Write8(0x00); - Write8(0x00); - Write8(0x00); - Write8(0x00); - return; - default: - // Even though x86 instructions are allowed to be up to 15 bytes long, - // AMD advises against using NOPs longer than 11 bytes because they - // carry a performance penalty on CPUs older than AMD family 16h. - Write8(0x66); - Write8(0x66); - Write8(0x66); - Write8(0x0F); - Write8(0x1F); - Write8(0x84); - Write8(0x00); - Write8(0x00); - Write8(0x00); - Write8(0x00); - Write8(0x00); - size -= 11; - continue; - } - } -} - -void XEmitter::PAUSE() { - Write8(0xF3); - NOP(); -} // use in tight spinloops for energy saving on some cpu -void XEmitter::CLC() { - CheckFlags(); - Write8(0xF8); -} // clear carry -void XEmitter::CMC() { - CheckFlags(); - Write8(0xF5); -} // flip carry -void XEmitter::STC() { - CheckFlags(); - Write8(0xF9); -} // set carry - -// TODO: xchg ah, al ??? -void XEmitter::XCHG_AHAL() { - Write8(0x86); - Write8(0xe0); - // alt. 86 c4 -} - -// These two can not be executed on early Intel 64-bit CPU:s, only on AMD! -void XEmitter::LAHF() { - Write8(0x9F); -} -void XEmitter::SAHF() { - CheckFlags(); - Write8(0x9E); -} - -void XEmitter::PUSHF() { - Write8(0x9C); -} -void XEmitter::POPF() { - CheckFlags(); - Write8(0x9D); -} - -void XEmitter::LFENCE() { - Write8(0x0F); - Write8(0xAE); - Write8(0xE8); -} -void XEmitter::MFENCE() { - Write8(0x0F); - Write8(0xAE); - Write8(0xF0); -} -void XEmitter::SFENCE() { - Write8(0x0F); - Write8(0xAE); - Write8(0xF8); -} - -void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) { - if (bits == 16) - Write8(0x66); - Rex(bits == 64, 0, 0, (int)reg >> 3); - Write8(byte + ((int)reg & 7)); -} - -void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) { - if (bits == 16) - Write8(0x66); - Rex(bits == 64, 0, 0, (int)reg >> 3); - Write8(byte1); - Write8(byte2 + ((int)reg & 7)); -} - -void XEmitter::CWD(int bits) { - if (bits == 16) - Write8(0x66); - Rex(bits == 64, 0, 0, 0); - Write8(0x99); -} - -void XEmitter::CBW(int bits) { - if (bits == 8) - Write8(0x66); - Rex(bits == 32, 0, 0, 0); - Write8(0x98); -} - -// Simple opcodes - -// push/pop do not need wide to be 64-bit -void XEmitter::PUSH(X64Reg reg) { - WriteSimple1Byte(32, 0x50, reg); -} -void XEmitter::POP(X64Reg reg) { - WriteSimple1Byte(32, 0x58, reg); -} - -void XEmitter::PUSH(int bits, const OpArg& reg) { - if (reg.IsSimpleReg()) - PUSH(reg.GetSimpleReg()); - else if (reg.IsImm()) { - switch (reg.GetImmBits()) { - case 8: - Write8(0x6A); - Write8((u8)(s8)reg.offset); - break; - case 16: - Write8(0x66); - Write8(0x68); - Write16((u16)(s16)(s32)reg.offset); - break; - case 32: - Write8(0x68); - Write32((u32)reg.offset); - break; - default: - ASSERT_MSG(0, "PUSH - Bad imm bits"); - break; - } - } else { - if (bits == 16) - Write8(0x66); - reg.WriteRex(this, bits, bits); - Write8(0xFF); - reg.WriteRest(this, 0, (X64Reg)6); - } -} - -void XEmitter::POP(int /*bits*/, const OpArg& reg) { - if (reg.IsSimpleReg()) - POP(reg.GetSimpleReg()); - else - ASSERT_MSG(0, "POP - Unsupported encoding"); -} - -void XEmitter::BSWAP(int bits, X64Reg reg) { - if (bits >= 32) { - WriteSimple2Byte(bits, 0x0F, 0xC8, reg); - } else if (bits == 16) { - ROL(16, R(reg), Imm8(8)); - } else if (bits == 8) { - // Do nothing - can't bswap a single byte... - } else { - ASSERT_MSG(0, "BSWAP - Wrong number of bits"); - } -} - -// Undefined opcode - reserved -// If we ever need a way to always cause a non-breakpoint hard exception... -void XEmitter::UD2() { - Write8(0x0F); - Write8(0x0B); -} - -void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) { - ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument"); - arg.operandReg = (u8)level; - arg.WriteRex(this, 0, 0); - Write8(0x0F); - Write8(0x18); - arg.WriteRest(this); -} - -void XEmitter::SETcc(CCFlags flag, OpArg dest) { - ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument"); - dest.operandReg = 0; - dest.WriteRex(this, 0, 8); - Write8(0x0F); - Write8(0x90 + (u8)flag); - dest.WriteRest(this); -} - -void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) { - ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument"); - ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported"); - if (bits == 16) - Write8(0x66); - src.operandReg = dest; - src.WriteRex(this, bits, bits); - Write8(0x0F); - Write8(0x40 + (u8)flag); - src.WriteRest(this); -} - -void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) { - ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument"); - CheckFlags(); - src.operandReg = ext; - if (bits == 16) - Write8(0x66); - src.WriteRex(this, bits, bits, 0); - if (bits == 8) { - Write8(0xF6); - } else { - Write8(0xF7); - } - src.WriteRest(this); -} - -void XEmitter::MUL(int bits, const OpArg& src) { - WriteMulDivType(bits, src, 4); -} -void XEmitter::DIV(int bits, const OpArg& src) { - WriteMulDivType(bits, src, 6); -} -void XEmitter::IMUL(int bits, const OpArg& src) { - WriteMulDivType(bits, src, 5); -} -void XEmitter::IDIV(int bits, const OpArg& src) { - WriteMulDivType(bits, src, 7); -} -void XEmitter::NEG(int bits, const OpArg& src) { - WriteMulDivType(bits, src, 3); -} -void XEmitter::NOT(int bits, const OpArg& src) { - WriteMulDivType(bits, src, 2); -} - -void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) { - ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument"); - CheckFlags(); - src.operandReg = (u8)dest; - if (bits == 16) - Write8(0x66); - if (rep) - Write8(0xF3); - src.WriteRex(this, bits, bits); - Write8(0x0F); - Write8(byte2); - src.WriteRest(this); -} - -void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) { - if (bits <= 16) - ASSERT_MSG(0, "MOVNTI - bits<=16"); - WriteBitSearchType(bits, src, dest, 0xC3); -} - -void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) { - WriteBitSearchType(bits, dest, src, 0xBC); -} // Bottom bit to top bit -void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) { - WriteBitSearchType(bits, dest, src, 0xBD); -} // Top bit to bottom bit - -void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) { - CheckFlags(); - if (!Common::GetCPUCaps().bmi1) - ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); - WriteBitSearchType(bits, dest, src, 0xBC, true); -} -void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) { - CheckFlags(); - if (!Common::GetCPUCaps().lzcnt) - ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer."); - WriteBitSearchType(bits, dest, src, 0xBD, true); -} - -void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) { - ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument"); - if (dbits == sbits) { - MOV(dbits, R(dest), src); - return; - } - src.operandReg = (u8)dest; - if (dbits == 16) - Write8(0x66); - src.WriteRex(this, dbits, sbits); - if (sbits == 8) { - Write8(0x0F); - Write8(0xBE); - } else if (sbits == 16) { - Write8(0x0F); - Write8(0xBF); - } else if (sbits == 32 && dbits == 64) { - Write8(0x63); - } else { - Crash(); - } - src.WriteRest(this); -} - -void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) { - ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument"); - if (dbits == sbits) { - MOV(dbits, R(dest), src); - return; - } - src.operandReg = (u8)dest; - if (dbits == 16) - Write8(0x66); - // the 32bit result is automatically zero extended to 64bit - src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits); - if (sbits == 8) { - Write8(0x0F); - Write8(0xB6); - } else if (sbits == 16) { - Write8(0x0F); - Write8(0xB7); - } else if (sbits == 32 && dbits == 64) { - Write8(0x8B); - } else { - ASSERT_MSG(0, "MOVZX - Invalid size"); - } - src.WriteRest(this); -} - -void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) { - ASSERT_MSG(Common::GetCPUCaps().movbe, - "Generating MOVBE on a system that does not support it."); - if (bits == 8) { - MOV(bits, dest, src); - return; - } - - if (bits == 16) - Write8(0x66); - - if (dest.IsSimpleReg()) { - ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem"); - src.WriteRex(this, bits, bits, dest.GetSimpleReg()); - Write8(0x0F); - Write8(0x38); - Write8(0xF0); - src.WriteRest(this, 0, dest.GetSimpleReg()); - } else if (src.IsSimpleReg()) { - ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem"); - dest.WriteRex(this, bits, bits, src.GetSimpleReg()); - Write8(0x0F); - Write8(0x38); - Write8(0xF1); - dest.WriteRest(this, 0, src.GetSimpleReg()); - } else { - ASSERT_MSG(0, "MOVBE: Not loading or storing to mem"); - } -} - -void XEmitter::LEA(int bits, X64Reg dest, OpArg src) { - ASSERT_MSG(!src.IsImm(), "LEA - Imm argument"); - src.operandReg = (u8)dest; - if (bits == 16) - Write8(0x66); // TODO: performance warning - src.WriteRex(this, bits, bits); - Write8(0x8D); - src.WriteRest(this, 0, INVALID_REG, bits == 64); -} - -// shift can be either imm8 or cl -void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) { - CheckFlags(); - bool writeImm = false; - if (dest.IsImm()) { - ASSERT_MSG(0, "WriteShift - can't shift imms"); - } - if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || - (shift.IsImm() && shift.GetImmBits() != 8)) { - ASSERT_MSG(0, "WriteShift - illegal argument"); - } - dest.operandReg = ext; - if (bits == 16) - Write8(0x66); - dest.WriteRex(this, bits, bits, 0); - if (shift.GetImmBits() == 8) { - // ok an imm - u8 imm = (u8)shift.offset; - if (imm == 1) { - Write8(bits == 8 ? 0xD0 : 0xD1); - } else { - writeImm = true; - Write8(bits == 8 ? 0xC0 : 0xC1); - } - } else { - Write8(bits == 8 ? 0xD2 : 0xD3); - } - dest.WriteRest(this, writeImm ? 1 : 0); - if (writeImm) - Write8((u8)shift.offset); -} - -// large rotates and shift are slower on intel than amd -// intel likes to rotate by 1, and the op is smaller too -void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) { - WriteShift(bits, dest, shift, 0); -} -void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) { - WriteShift(bits, dest, shift, 1); -} -void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) { - WriteShift(bits, dest, shift, 2); -} -void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) { - WriteShift(bits, dest, shift, 3); -} -void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) { - WriteShift(bits, dest, shift, 4); -} -void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) { - WriteShift(bits, dest, shift, 5); -} -void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) { - WriteShift(bits, dest, shift, 7); -} - -// index can be either imm8 or register, don't use memory destination because it's slow -void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) { - CheckFlags(); - if (dest.IsImm()) { - ASSERT_MSG(0, "WriteBitTest - can't test imms"); - } - if ((index.IsImm() && index.GetImmBits() != 8)) { - ASSERT_MSG(0, "WriteBitTest - illegal argument"); - } - if (bits == 16) - Write8(0x66); - if (index.IsImm()) { - dest.WriteRex(this, bits, bits); - Write8(0x0F); - Write8(0xBA); - dest.WriteRest(this, 1, (X64Reg)ext); - Write8((u8)index.offset); - } else { - X64Reg operand = index.GetSimpleReg(); - dest.WriteRex(this, bits, bits, operand); - Write8(0x0F); - Write8(0x83 + 8 * ext); - dest.WriteRest(this, 1, operand); - } -} - -void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) { - WriteBitTest(bits, dest, index, 4); -} -void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) { - WriteBitTest(bits, dest, index, 5); -} -void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) { - WriteBitTest(bits, dest, index, 6); -} -void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) { - WriteBitTest(bits, dest, index, 7); -} - -// shift can be either imm8 or cl -void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) { - CheckFlags(); - if (dest.IsImm()) { - ASSERT_MSG(0, "SHRD - can't use imms as destination"); - } - if (!src.IsSimpleReg()) { - ASSERT_MSG(0, "SHRD - must use simple register as source"); - } - if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || - (shift.IsImm() && shift.GetImmBits() != 8)) { - ASSERT_MSG(0, "SHRD - illegal shift"); - } - if (bits == 16) - Write8(0x66); - X64Reg operand = src.GetSimpleReg(); - dest.WriteRex(this, bits, bits, operand); - if (shift.GetImmBits() == 8) { - Write8(0x0F); - Write8(0xAC); - dest.WriteRest(this, 1, operand); - Write8((u8)shift.offset); - } else { - Write8(0x0F); - Write8(0xAD); - dest.WriteRest(this, 0, operand); - } -} - -void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) { - CheckFlags(); - if (dest.IsImm()) { - ASSERT_MSG(0, "SHLD - can't use imms as destination"); - } - if (!src.IsSimpleReg()) { - ASSERT_MSG(0, "SHLD - must use simple register as source"); - } - if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || - (shift.IsImm() && shift.GetImmBits() != 8)) { - ASSERT_MSG(0, "SHLD - illegal shift"); - } - if (bits == 16) - Write8(0x66); - X64Reg operand = src.GetSimpleReg(); - dest.WriteRex(this, bits, bits, operand); - if (shift.GetImmBits() == 8) { - Write8(0x0F); - Write8(0xA4); - dest.WriteRest(this, 1, operand); - Write8((u8)shift.offset); - } else { - Write8(0x0F); - Write8(0xA5); - dest.WriteRest(this, 0, operand); - } -} - -void OpArg::WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg _operandReg, int bits) { - if (bits == 16) - emit->Write8(0x66); - - this->operandReg = (u8)_operandReg; - WriteRex(emit, bits, bits); - emit->Write8(op); - WriteRest(emit); -} - -// operand can either be immediate or register -void OpArg::WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, - int bits) const { - X64Reg _operandReg; - if (IsImm()) { - ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order"); - } - - if (bits == 16) - emit->Write8(0x66); - - int immToWrite = 0; - - if (operand.IsImm()) { - WriteRex(emit, bits, bits); - - if (!toRM) { - ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)"); - } - - if (operand.scale == SCALE_IMM8 && bits == 8) { - // op al, imm8 - if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) { - emit->Write8(normalops[op].eaximm8); - emit->Write8((u8)operand.offset); - return; - } - // mov reg, imm8 - if (!scale && op == nrmMOV) { - emit->Write8(0xB0 + (offsetOrBaseReg & 7)); - emit->Write8((u8)operand.offset); - return; - } - // op r/m8, imm8 - emit->Write8(normalops[op].imm8); - immToWrite = 8; - } else if ((operand.scale == SCALE_IMM16 && bits == 16) || - (operand.scale == SCALE_IMM32 && bits == 32) || - (operand.scale == SCALE_IMM32 && bits == 64)) { - // Try to save immediate size if we can, but first check to see - // if the instruction supports simm8. - // op r/m, imm8 - if (normalops[op].simm8 != 0xCC && - ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) || - (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) { - emit->Write8(normalops[op].simm8); - immToWrite = 8; - } else { - // mov reg, imm - if (!scale && op == nrmMOV && bits != 64) { - emit->Write8(0xB8 + (offsetOrBaseReg & 7)); - if (bits == 16) - emit->Write16((u16)operand.offset); - else - emit->Write32((u32)operand.offset); - return; - } - // op eax, imm - if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) { - emit->Write8(normalops[op].eaximm32); - if (bits == 16) - emit->Write16((u16)operand.offset); - else - emit->Write32((u32)operand.offset); - return; - } - // op r/m, imm - emit->Write8(normalops[op].imm32); - immToWrite = bits == 16 ? 16 : 32; - } - } else if ((operand.scale == SCALE_IMM8 && bits == 16) || - (operand.scale == SCALE_IMM8 && bits == 32) || - (operand.scale == SCALE_IMM8 && bits == 64)) { - // op r/m, imm8 - emit->Write8(normalops[op].simm8); - immToWrite = 8; - } else if (operand.scale == SCALE_IMM64 && bits == 64) { - if (scale) { - ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination"); - } - // mov reg64, imm64 - else if (op == nrmMOV) { - emit->Write8(0xB8 + (offsetOrBaseReg & 7)); - emit->Write64((u64)operand.offset); - return; - } - ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm"); - } else { - ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); - } - _operandReg = (X64Reg)normalops[op].ext; // pass extension in REG of ModRM - } else { - _operandReg = (X64Reg)operand.offsetOrBaseReg; - WriteRex(emit, bits, bits, _operandReg); - // op r/m, reg - if (toRM) { - emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32); - } - // op reg, r/m - else { - emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32); - } - } - WriteRest(emit, immToWrite >> 3, _operandReg); - switch (immToWrite) { - case 0: - break; - case 8: - emit->Write8((u8)operand.offset); - break; - case 16: - emit->Write16((u16)operand.offset); - break; - case 32: - emit->Write32((u32)operand.offset); - break; - default: - ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); - } -} - -void XEmitter::WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, - const OpArg& a2) { - if (a1.IsImm()) { - // Booh! Can't write to an imm - ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm"); - return; - } - if (a2.IsImm()) { - a1.WriteNormalOp(emit, true, op, a2, bits); - } else { - if (a1.IsSimpleReg()) { - a2.WriteNormalOp(emit, false, op, a1, bits); - } else { - ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), - "WriteNormalOp - a1 and a2 cannot both be memory"); - a1.WriteNormalOp(emit, true, op, a2, bits); - } - } -} - -void XEmitter::ADD(int bits, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - WriteNormalOp(this, bits, nrmADD, a1, a2); -} -void XEmitter::ADC(int bits, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - WriteNormalOp(this, bits, nrmADC, a1, a2); -} -void XEmitter::SUB(int bits, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - WriteNormalOp(this, bits, nrmSUB, a1, a2); -} -void XEmitter::SBB(int bits, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - WriteNormalOp(this, bits, nrmSBB, a1, a2); -} -void XEmitter::AND(int bits, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - WriteNormalOp(this, bits, nrmAND, a1, a2); -} -void XEmitter::OR(int bits, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - WriteNormalOp(this, bits, nrmOR, a1, a2); -} -void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - WriteNormalOp(this, bits, nrmXOR, a1, a2); -} -void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2) { - if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) - LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); - WriteNormalOp(this, bits, nrmMOV, a1, a2); -} -void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - WriteNormalOp(this, bits, nrmTEST, a1, a2); -} -void XEmitter::CMP(int bits, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - WriteNormalOp(this, bits, nrmCMP, a1, a2); -} -void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) { - WriteNormalOp(this, bits, nrmXCHG, a1, a2); -} - -void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) { - CheckFlags(); - if (bits == 8) { - ASSERT_MSG(0, "IMUL - illegal bit size!"); - return; - } - - if (a1.IsImm()) { - ASSERT_MSG(0, "IMUL - second arg cannot be imm!"); - return; - } - - if (!a2.IsImm()) { - ASSERT_MSG(0, "IMUL - third arg must be imm!"); - return; - } - - if (bits == 16) - Write8(0x66); - a1.WriteRex(this, bits, bits, regOp); - - if (a2.GetImmBits() == 8 || (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) || - (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) { - Write8(0x6B); - a1.WriteRest(this, 1, regOp); - Write8((u8)a2.offset); - } else { - Write8(0x69); - if (a2.GetImmBits() == 16 && bits == 16) { - a1.WriteRest(this, 2, regOp); - Write16((u16)a2.offset); - } else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) { - a1.WriteRest(this, 4, regOp); - Write32((u32)a2.offset); - } else { - ASSERT_MSG(0, "IMUL - unhandled case!"); - } - } -} - -void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) { - CheckFlags(); - if (bits == 8) { - ASSERT_MSG(0, "IMUL - illegal bit size!"); - return; - } - - if (a.IsImm()) { - IMUL(bits, regOp, R(regOp), a); - return; - } - - if (bits == 16) - Write8(0x66); - a.WriteRex(this, bits, bits, regOp); - Write8(0x0F); - Write8(0xAF); - a.WriteRest(this, 0, regOp); -} - -void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) { - if (opPrefix) - Write8(opPrefix); - arg.operandReg = regOp; - arg.WriteRex(this, 0, 0); - Write8(0x0F); - if (op > 0xFF) - Write8((op >> 8) & 0xFF); - Write8(op & 0xFF); - arg.WriteRest(this, extrabytes); -} - -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { - WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); -} - -static int GetVEXmmmmm(u16 op) { - // Currently, only 0x38 and 0x3A are used as secondary escape byte. - if ((op >> 8) == 0x3A) - return 3; - if ((op >> 8) == 0x38) - return 2; - - return 1; -} - -static int GetVEXpp(u8 opPrefix) { - if (opPrefix == 0x66) - return 1; - if (opPrefix == 0xF3) - return 2; - if (opPrefix == 0xF2) - return 3; - - return 0; -} - -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, - int extrabytes) { - if (!Common::GetCPUCaps().avx) - ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); - int mmmmm = GetVEXmmmmm(op); - int pp = GetVEXpp(opPrefix); - // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size - // here - arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm); - Write8(op & 0xFF); - arg.WriteRest(this, extrabytes, regOp1); -} - -// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 -void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, - const OpArg& arg, int extrabytes) { - if (size != 32 && size != 64) - ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); - int mmmmm = GetVEXmmmmm(op); - int pp = GetVEXpp(opPrefix); - arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64); - Write8(op & 0xFF); - arg.WriteRest(this, extrabytes, regOp1); -} - -void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, - const OpArg& arg, int extrabytes) { - CheckFlags(); - if (!Common::GetCPUCaps().bmi1) - ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); - WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); -} - -void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, - const OpArg& arg, int extrabytes) { - CheckFlags(); - if (!Common::GetCPUCaps().bmi2) - ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer."); - WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); -} - -void XEmitter::MOVD_xmm(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x6E, dest, arg, 0); -} -void XEmitter::MOVD_xmm(const OpArg& arg, X64Reg src) { - WriteSSEOp(0x66, 0x7E, src, arg, 0); -} - -void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) { -#ifdef ARCHITECTURE_x86_64 - // Alternate encoding - // This does not display correctly in MSVC's debugger, it thinks it's a MOVD - arg.operandReg = dest; - Write8(0x66); - arg.WriteRex(this, 64, 0); - Write8(0x0f); - Write8(0x6E); - arg.WriteRest(this, 0); -#else - arg.operandReg = dest; - Write8(0xF3); - Write8(0x0f); - Write8(0x7E); - arg.WriteRest(this, 0); -#endif -} - -void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) { - if (src > 7 || arg.IsSimpleReg()) { - // Alternate encoding - // This does not display correctly in MSVC's debugger, it thinks it's a MOVD - arg.operandReg = src; - Write8(0x66); - arg.WriteRex(this, 64, 0); - Write8(0x0f); - Write8(0x7E); - arg.WriteRest(this, 0); - } else { - arg.operandReg = src; - arg.WriteRex(this, 0, 0); - Write8(0x66); - Write8(0x0f); - Write8(0xD6); - arg.WriteRest(this, 0); - } -} - -void XEmitter::WriteMXCSR(OpArg arg, int ext) { - if (arg.IsImm() || arg.IsSimpleReg()) - ASSERT_MSG(0, "MXCSR - invalid operand"); - - arg.operandReg = ext; - arg.WriteRex(this, 0, 0); - Write8(0x0F); - Write8(0xAE); - arg.WriteRest(this); -} - -void XEmitter::STMXCSR(const OpArg& memloc) { - WriteMXCSR(memloc, 3); -} -void XEmitter::LDMXCSR(const OpArg& memloc) { - WriteMXCSR(memloc, 2); -} - -void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg); -} -void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x00, sseMOVNTP, regOp, arg); -} -void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x66, sseMOVNTP, regOp, arg); -} - -void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseADD, regOp, arg); -} -void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, sseADD, regOp, arg); -} -void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseSUB, regOp, arg); -} -void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, sseSUB, regOp, arg); -} -void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) { - WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); - Write8(compare); -} -void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) { - WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); - Write8(compare); -} -void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseMUL, regOp, arg); -} -void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, sseMUL, regOp, arg); -} -void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseDIV, regOp, arg); -} -void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, sseDIV, regOp, arg); -} -void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseMIN, regOp, arg); -} -void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, sseMIN, regOp, arg); -} -void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseMAX, regOp, arg); -} -void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, sseMAX, regOp, arg); -} -void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseSQRT, regOp, arg); -} -void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, sseSQRT, regOp, arg); -} -void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseRCP, regOp, arg); -} -void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseRSQRT, regOp, arg); -} - -void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseADD, regOp, arg); -} -void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseADD, regOp, arg); -} -void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseSUB, regOp, arg); -} -void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseSUB, regOp, arg); -} -void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) { - WriteSSEOp(0x00, sseCMP, regOp, arg, 1); - Write8(compare); -} -void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) { - WriteSSEOp(0x66, sseCMP, regOp, arg, 1); - Write8(compare); -} -void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseAND, regOp, arg); -} -void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseAND, regOp, arg); -} -void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseANDN, regOp, arg); -} -void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseANDN, regOp, arg); -} -void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseOR, regOp, arg); -} -void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseOR, regOp, arg); -} -void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseXOR, regOp, arg); -} -void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseXOR, regOp, arg); -} -void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseMUL, regOp, arg); -} -void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseMUL, regOp, arg); -} -void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseDIV, regOp, arg); -} -void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseDIV, regOp, arg); -} -void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseMIN, regOp, arg); -} -void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseMIN, regOp, arg); -} -void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseMAX, regOp, arg); -} -void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseMAX, regOp, arg); -} -void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseSQRT, regOp, arg); -} -void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseSQRT, regOp, arg); -} -void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseRCP, regOp, arg); -} -void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseRSQRT, regOp, arg); -} -void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) { - WriteSSEOp(0x00, sseSHUF, regOp, arg, 1); - Write8(shuffle); -} -void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) { - WriteSSEOp(0x66, sseSHUF, regOp, arg, 1); - Write8(shuffle); -} - -void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, sseHADD, regOp, arg); -} - -void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseCOMIS, regOp, arg); -} // weird that these should be packed -void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseCOMIS, regOp, arg); -} // ordered -void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseUCOMIS, regOp, arg); -} // unordered -void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseUCOMIS, regOp, arg); -} - -void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg); -} -void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg); -} -void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg); -} -void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg); -} - -void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg); -} -void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg); -} -void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg); -} -void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg); -} - -void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg); -} -void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg); -} -void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg); -} -void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg); -} - -void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg); -} -void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg); -} -void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg); -} -void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg); -} - -void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); -} -void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); -} -void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); -} -void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); -} - -void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); -} -void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); -} -void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); -} -void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { - WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); -} - -void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) { - WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2)); -} -void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) { - WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2)); -} - -void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, 0x5A, regOp, arg); -} -void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, 0x5A, regOp, arg); -} - -void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, 0x5A, regOp, arg); -} -void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, 0x5A, regOp, arg); -} -void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, 0x2D, regOp, arg); -} -void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, 0x2D, regOp, arg); -} -void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, 0x2A, regOp, arg); -} -void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, 0x2A, regOp, arg); -} - -void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, 0xE6, regOp, arg); -} -void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x00, 0x5B, regOp, arg); -} -void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, 0xE6, regOp, arg); -} -void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, 0x5B, regOp, arg); -} - -void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF2, 0x2C, regOp, arg); -} -void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, 0x2C, regOp, arg); -} -void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0xF3, 0x5B, regOp, arg); -} -void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) { - WriteSSEOp(0x66, 0xE6, regOp, arg); -} - -void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) { - WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src)); -} - -void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x00, 0x50, dest, arg); -} -void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x50, dest, arg); -} - -void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0xF2, sseLDDQU, dest, arg); -} // For integer data only - -// THESE TWO ARE UNTESTED. -void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x00, 0x14, dest, arg); -} -void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x00, 0x15, dest, arg); -} - -void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x14, dest, arg); -} -void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x15, dest, arg); -} - -void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) { - if (Common::GetCPUCaps().sse3) { - WriteSSEOp(0xF2, 0x12, regOp, arg); // SSE3 movddup - } else { - // Simulate this instruction with SSE2 instructions - if (!arg.IsSimpleReg(regOp)) - MOVSD(regOp, arg); - UNPCKLPD(regOp, R(regOp)); - } -} - -// There are a few more left - -// Also some integer instructions are missing -void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x6B, dest, arg); -} -void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x63, dest, arg); -} -void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x67, dest, arg); -} - -void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x60, dest, arg); -} -void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x61, dest, arg); -} -void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x62, dest, arg); -} -void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x6C, dest, arg); -} - -void XEmitter::PSRLW(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg)); - Write8(shift); -} - -void XEmitter::PSRLD(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg)); - Write8(shift); -} - -void XEmitter::PSRLQ(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg)); - Write8(shift); -} - -void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) { - WriteSSEOp(0x66, 0xd3, reg, arg); -} - -void XEmitter::PSRLDQ(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg)); - Write8(shift); -} - -void XEmitter::PSLLW(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg)); - Write8(shift); -} - -void XEmitter::PSLLD(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg)); - Write8(shift); -} - -void XEmitter::PSLLQ(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg)); - Write8(shift); -} - -void XEmitter::PSLLDQ(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg)); - Write8(shift); -} - -void XEmitter::PSRAW(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg)); - Write8(shift); -} - -void XEmitter::PSRAD(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg)); - Write8(shift); -} - -void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { - if (!Common::GetCPUCaps().ssse3) - ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); - WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); -} - -void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { - if (!Common::GetCPUCaps().sse4_1) - ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); - WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); -} - -void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) { - WriteSSSE3Op(0x66, 0x3800, dest, arg); -} -void XEmitter::PTEST(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3817, dest, arg); -} -void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x382b, dest, arg); -} -void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) { - WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); - Write8(mask); -} - -void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3838, dest, arg); -} -void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3839, dest, arg); -} -void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x383a, dest, arg); -} -void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x383b, dest, arg); -} -void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x383c, dest, arg); -} -void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x383d, dest, arg); -} -void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x383e, dest, arg); -} -void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x383f, dest, arg); -} - -void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3820, dest, arg); -} -void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3821, dest, arg); -} -void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3822, dest, arg); -} -void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3823, dest, arg); -} -void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3824, dest, arg); -} -void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3825, dest, arg); -} -void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3830, dest, arg); -} -void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3831, dest, arg); -} -void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3832, dest, arg); -} -void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3833, dest, arg); -} -void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3834, dest, arg); -} -void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3835, dest, arg); -} - -void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3810, dest, arg); -} -void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3814, dest, arg); -} -void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) { - WriteSSE41Op(0x66, 0x3815, dest, arg); -} -void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { - WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); - Write8(blend); -} -void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { - WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); - Write8(blend); -} - -void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) { - WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); - Write8(mode); -} -void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) { - WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); - Write8(mode); -} -void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) { - WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); - Write8(mode); -} -void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) { - WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); - Write8(mode); -} - -void XEmitter::PAND(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xDB, dest, arg); -} -void XEmitter::PANDN(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xDF, dest, arg); -} -void XEmitter::PXOR(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xEF, dest, arg); -} -void XEmitter::POR(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xEB, dest, arg); -} - -void XEmitter::PADDB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xFC, dest, arg); -} -void XEmitter::PADDW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xFD, dest, arg); -} -void XEmitter::PADDD(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xFE, dest, arg); -} -void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xD4, dest, arg); -} - -void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xEC, dest, arg); -} -void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xED, dest, arg); -} -void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xDC, dest, arg); -} -void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xDD, dest, arg); -} - -void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xF8, dest, arg); -} -void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xF9, dest, arg); -} -void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xFA, dest, arg); -} -void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xFB, dest, arg); -} - -void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xE8, dest, arg); -} -void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xE9, dest, arg); -} -void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xD8, dest, arg); -} -void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xD9, dest, arg); -} - -void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xE0, dest, arg); -} -void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xE3, dest, arg); -} - -void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x74, dest, arg); -} -void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x75, dest, arg); -} -void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x76, dest, arg); -} - -void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x64, dest, arg); -} -void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x65, dest, arg); -} -void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0x66, dest, arg); -} - -void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) { - WriteSSEOp(0x66, 0xC5, dest, arg, 1); - Write8(subreg); -} -void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) { - WriteSSEOp(0x66, 0xC4, dest, arg, 1); - Write8(subreg); -} - -void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xF5, dest, arg); -} -void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xF6, dest, arg); -} - -void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xEE, dest, arg); -} -void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xDE, dest, arg); -} -void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xEA, dest, arg); -} -void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xDA, dest, arg); -} - -void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) { - WriteSSEOp(0x66, 0xD7, dest, arg); -} -void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) { - WriteSSEOp(0x66, 0x70, regOp, arg, 1); - Write8(shuffle); -} -void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) { - WriteSSEOp(0xF2, 0x70, regOp, arg, 1); - Write8(shuffle); -} -void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) { - WriteSSEOp(0xF3, 0x70, regOp, arg, 1); - Write8(shuffle); -} - -// VEX -void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg); -} -void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg); -} -void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg); -} -void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg); -} -void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg); -} -void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg); -} -void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg); -} -void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg); -} -void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg); -} -void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) { - WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); - Write8(shuffle); -} -void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg); -} -void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg); -} - -void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); -} -void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); -} -void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); -} -void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); -} -void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); -} -void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); -} -void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); -} -void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); -} - -void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); -} -void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); -} -void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); -} -void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); -} - -void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); -} -void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); -} -void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); -} -void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); -} -void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); -} -void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); -} -void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); -} -void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); -} -void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); -} -void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); -} -void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); -} -void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); -} -void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); -} -void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); -} -void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); -} -void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); -} -void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); -} -void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); -} - -void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { - WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg); -} -void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { - WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg); -} -void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { - WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg); -} -void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) { - WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); - Write8(rotate); -} -void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg); -} -void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg); -} -void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg); -} -void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { - WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg); -} -void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) { - WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg); -} -void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) { - WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg); -} -void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) { - WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg); -} -void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { - WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg); -} -void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { - WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg); -} - -// Prefixes - -void XEmitter::LOCK() { - Write8(0xF0); -} -void XEmitter::REP() { - Write8(0xF3); -} -void XEmitter::REPNE() { - Write8(0xF2); -} -void XEmitter::FSOverride() { - Write8(0x64); -} -void XEmitter::GSOverride() { - Write8(0x65); -} - -void XEmitter::FWAIT() { - Write8(0x9B); -} - -// TODO: make this more generic -void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) { - int mf = 0; - ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), - "WriteFloatLoadStore: 80 bits not supported for this instruction"); - switch (bits) { - case 32: - mf = 0; - break; - case 64: - mf = 4; - break; - case 80: - mf = 2; - break; - default: - ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)"); - } - Write8(0xd9 | mf); - // x87 instructions use the reg field of the ModR/M byte as opcode: - if (bits == 80) - op = op_80b; - arg.WriteRest(this, 0, (X64Reg)op); -} - -void XEmitter::FLD(int bits, const OpArg& src) { - WriteFloatLoadStore(bits, floatLD, floatLD80, src); -} -void XEmitter::FST(int bits, const OpArg& dest) { - WriteFloatLoadStore(bits, floatST, floatINVALID, dest); -} -void XEmitter::FSTP(int bits, const OpArg& dest) { - WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest); -} -void XEmitter::FNSTSW_AX() { - Write8(0xDF); - Write8(0xE0); -} - -void XEmitter::RDTSC() { - Write8(0x0F); - Write8(0x31); -} - -void XCodeBlock::PoisonMemory() { - // x86/64: 0xCC = breakpoint - memset(region, 0xCC, region_size); -} -} diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h deleted file mode 100644 index 7d7cdde16..000000000 --- a/src/common/x64/emitter.h +++ /dev/null @@ -1,1206 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#pragma once - -#include <cstddef> -#include "common/assert.h" -#include "common/bit_set.h" -#include "common/code_block.h" -#include "common/common_types.h" - -#if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64) -#define _ARCH_64 -#endif - -#ifdef _ARCH_64 -#define PTRBITS 64 -#else -#define PTRBITS 32 -#endif - -namespace Gen { - -enum X64Reg { - EAX = 0, - EBX = 3, - ECX = 1, - EDX = 2, - ESI = 6, - EDI = 7, - EBP = 5, - ESP = 4, - - RAX = 0, - RBX = 3, - RCX = 1, - RDX = 2, - RSI = 6, - RDI = 7, - RBP = 5, - RSP = 4, - R8 = 8, - R9 = 9, - R10 = 10, - R11 = 11, - R12 = 12, - R13 = 13, - R14 = 14, - R15 = 15, - - AL = 0, - BL = 3, - CL = 1, - DL = 2, - SIL = 6, - DIL = 7, - BPL = 5, - SPL = 4, - AH = 0x104, - BH = 0x107, - CH = 0x105, - DH = 0x106, - - AX = 0, - BX = 3, - CX = 1, - DX = 2, - SI = 6, - DI = 7, - BP = 5, - SP = 4, - - XMM0 = 0, - XMM1, - XMM2, - XMM3, - XMM4, - XMM5, - XMM6, - XMM7, - XMM8, - XMM9, - XMM10, - XMM11, - XMM12, - XMM13, - XMM14, - XMM15, - - YMM0 = 0, - YMM1, - YMM2, - YMM3, - YMM4, - YMM5, - YMM6, - YMM7, - YMM8, - YMM9, - YMM10, - YMM11, - YMM12, - YMM13, - YMM14, - YMM15, - - INVALID_REG = 0xFFFFFFFF -}; - -enum CCFlags { - CC_O = 0, - CC_NO = 1, - CC_B = 2, - CC_C = 2, - CC_NAE = 2, - CC_NB = 3, - CC_NC = 3, - CC_AE = 3, - CC_Z = 4, - CC_E = 4, - CC_NZ = 5, - CC_NE = 5, - CC_BE = 6, - CC_NA = 6, - CC_NBE = 7, - CC_A = 7, - CC_S = 8, - CC_NS = 9, - CC_P = 0xA, - CC_PE = 0xA, - CC_NP = 0xB, - CC_PO = 0xB, - CC_L = 0xC, - CC_NGE = 0xC, - CC_NL = 0xD, - CC_GE = 0xD, - CC_LE = 0xE, - CC_NG = 0xE, - CC_NLE = 0xF, - CC_G = 0xF -}; - -enum { - NUMGPRs = 16, - NUMXMMs = 16, -}; - -enum { - SCALE_NONE = 0, - SCALE_1 = 1, - SCALE_2 = 2, - SCALE_4 = 4, - SCALE_8 = 8, - SCALE_ATREG = 16, - // SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG - SCALE_NOBASE_2 = 34, - SCALE_NOBASE_4 = 36, - SCALE_NOBASE_8 = 40, - SCALE_RIP = 0xFF, - SCALE_IMM8 = 0xF0, - SCALE_IMM16 = 0xF1, - SCALE_IMM32 = 0xF2, - SCALE_IMM64 = 0xF3, -}; - -enum NormalOp { - nrmADD, - nrmADC, - nrmSUB, - nrmSBB, - nrmAND, - nrmOR, - nrmXOR, - nrmMOV, - nrmTEST, - nrmCMP, - nrmXCHG, -}; - -enum { - CMP_EQ = 0, - CMP_LT = 1, - CMP_LE = 2, - CMP_UNORD = 3, - CMP_NEQ = 4, - CMP_NLT = 5, - CMP_NLE = 6, - CMP_ORD = 7, -}; - -enum FloatOp { - floatLD = 0, - floatST = 2, - floatSTP = 3, - floatLD80 = 5, - floatSTP80 = 7, - - floatINVALID = -1, -}; - -enum FloatRound { - FROUND_NEAREST = 0, - FROUND_FLOOR = 1, - FROUND_CEIL = 2, - FROUND_ZERO = 3, - FROUND_MXCSR = 4, - - FROUND_RAISE_PRECISION = 0, - FROUND_IGNORE_PRECISION = 8, -}; - -class XEmitter; - -// RIP addressing does not benefit from micro op fusion on Core arch -struct OpArg { - friend class XEmitter; - - constexpr OpArg() = default; // dummy op arg, used for storage - constexpr OpArg(u64 offset_, int scale_, X64Reg rmReg = RAX, X64Reg scaledReg = RAX) - : scale(static_cast<u8>(scale_)), offsetOrBaseReg(static_cast<u16>(rmReg)), - indexReg(static_cast<u16>(scaledReg)), offset(offset_) {} - - constexpr bool operator==(const OpArg& b) const { - return operandReg == b.operandReg && scale == b.scale && - offsetOrBaseReg == b.offsetOrBaseReg && indexReg == b.indexReg && offset == b.offset; - } - - void WriteRex(XEmitter* emit, int opBits, int bits, int customOp = -1) const; - void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, - int W = 0) const; - void WriteRest(XEmitter* emit, int extraBytes = 0, X64Reg operandReg = INVALID_REG, - bool warn_64bit_offset = true) const; - void WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg operandReg, int bits); - void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, - int bits) const; - - constexpr bool IsImm() const { - return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || - scale == SCALE_IMM64; - } - constexpr bool IsSimpleReg() const { - return scale == SCALE_NONE; - } - constexpr bool IsSimpleReg(X64Reg reg) const { - return IsSimpleReg() && GetSimpleReg() == reg; - } - - int GetImmBits() const { - switch (scale) { - case SCALE_IMM8: - return 8; - case SCALE_IMM16: - return 16; - case SCALE_IMM32: - return 32; - case SCALE_IMM64: - return 64; - default: - return -1; - } - } - - void SetImmBits(int bits) { - switch (bits) { - case 8: - scale = SCALE_IMM8; - break; - case 16: - scale = SCALE_IMM16; - break; - case 32: - scale = SCALE_IMM32; - break; - case 64: - scale = SCALE_IMM64; - break; - } - } - - constexpr X64Reg GetSimpleReg() const { - return scale == SCALE_NONE ? static_cast<X64Reg>(offsetOrBaseReg) : INVALID_REG; - } - - constexpr u32 GetImmValue() const { - return static_cast<u32>(offset); - } - - // For loops. - void IncreaseOffset(int sz) { - offset += sz; - } - -private: - u8 scale = 0; - u16 offsetOrBaseReg = 0; - u16 indexReg = 0; - u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available. - u16 operandReg = 0; -}; - -template <typename T> -inline OpArg M(const T* ptr) { - return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP)); -} -constexpr OpArg R(X64Reg value) { - return OpArg(0, SCALE_NONE, value); -} -constexpr OpArg MatR(X64Reg value) { - return OpArg(0, SCALE_ATREG, value); -} - -constexpr OpArg MDisp(X64Reg value, int offset) { - return OpArg(static_cast<u32>(offset), SCALE_ATREG, value); -} - -constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) { - return OpArg(offset, scale, base, scaled); -} - -constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) { - return scale == SCALE_1 ? OpArg(offset, SCALE_ATREG, scaled) - : OpArg(offset, scale | 0x20, RAX, scaled); -} - -constexpr OpArg MRegSum(X64Reg base, X64Reg offset) { - return MComplex(base, offset, 1, 0); -} - -constexpr OpArg Imm8(u8 imm) { - return OpArg(imm, SCALE_IMM8); -} -constexpr OpArg Imm16(u16 imm) { - return OpArg(imm, SCALE_IMM16); -} // rarely used -constexpr OpArg Imm32(u32 imm) { - return OpArg(imm, SCALE_IMM32); -} -constexpr OpArg Imm64(u64 imm) { - return OpArg(imm, SCALE_IMM64); -} -constexpr OpArg UImmAuto(u32 imm) { - return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8); -} -constexpr OpArg SImmAuto(s32 imm) { - return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8); -} - -template <typename T> -OpArg ImmPtr(const T* imm) { -#ifdef _ARCH_64 - return Imm64(reinterpret_cast<u64>(imm)); -#else - return Imm32(reinterpret_cast<u32>(imm)); -#endif -} - -inline u32 PtrOffset(const void* ptr, const void* base) { -#ifdef _ARCH_64 - s64 distance = (s64)ptr - (s64)base; - if (distance >= 0x80000000LL || distance < -0x80000000LL) { - ASSERT_MSG(0, "pointer offset out of range"); - return 0; - } - - return (u32)distance; -#else - return (u32)ptr - (u32)base; -#endif -} - -// usage: int a[]; ARRAY_OFFSET(a,10) -#define ARRAY_OFFSET(array, index) ((u32)((u64) & (array)[index] - (u64) & (array)[0])) -// usage: struct {int e;} s; STRUCT_OFFSET(s,e) -#define STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) - -struct FixupBranch { - u8* ptr; - int type; // 0 = 8bit 1 = 32bit -}; - -enum SSECompare { - EQ = 0, - LT, - LE, - UNORD, - NEQ, - NLT, - NLE, - ORD, -}; - -class XEmitter { - friend struct OpArg; // for Write8 etc -private: - u8* code; - bool flags_locked; - - void CheckFlags(); - - void Rex(int w, int r, int x, int b); - void WriteSimple1Byte(int bits, u8 byte, X64Reg reg); - void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); - void WriteMulDivType(int bits, OpArg src, int ext); - void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); - void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext); - void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext); - void WriteMXCSR(OpArg arg, int ext); - void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); - void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); - void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); - void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, - int extrabytes = 0); - void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, - int extrabytes = 0); - void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, - int extrabytes = 0); - void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, - int extrabytes = 0); - void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); - void WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); - - void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, - size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); - -protected: - void Write8(u8 value); - void Write16(u16 value); - void Write32(u32 value); - void Write64(u64 value); - -public: - XEmitter() { - code = nullptr; - flags_locked = false; - } - XEmitter(u8* code_ptr) { - code = code_ptr; - flags_locked = false; - } - virtual ~XEmitter() {} - - void WriteModRM(int mod, int rm, int reg); - void WriteSIB(int scale, int index, int base); - - void SetCodePtr(u8* ptr); - void ReserveCodeSpace(int bytes); - const u8* AlignCode4(); - const u8* AlignCode16(); - const u8* AlignCodePage(); - const u8* GetCodePtr() const; - u8* GetWritableCodePtr(); - - void LockFlags() { - flags_locked = true; - } - void UnlockFlags() { - flags_locked = false; - } - - // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU - // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other - // string instr., - // INC and DEC are slow on Intel Core, but not on AMD. They create a - // false flag dependency because they only update a subset of the flags. - // XCHG is SLOW and should be avoided. - - // Debug breakpoint - void INT3(); - - // Do nothing - void NOP(size_t count = 1); - - // Save energy in wait-loops on P4 only. Probably not too useful. - void PAUSE(); - - // Flag control - void STC(); - void CLC(); - void CMC(); - - // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and - // AMD! - void LAHF(); // 3 cycle vector path - void SAHF(); // direct path fast - - // Stack control - void PUSH(X64Reg reg); - void POP(X64Reg reg); - void PUSH(int bits, const OpArg& reg); - void POP(int bits, const OpArg& reg); - void PUSHF(); - void POPF(); - - // Flow control - void RET(); - void RET_FAST(); - void UD2(); - FixupBranch J(bool force5bytes = false); - - void JMP(const u8* addr, bool force5Bytes = false); - void JMPptr(const OpArg& arg); - void JMPself(); // infinite loop! -#ifdef CALL -#undef CALL -#endif - void CALL(const void* fnptr); - FixupBranch CALL(); - void CALLptr(OpArg arg); - - FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); - void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); - - void SetJumpTarget(const FixupBranch& branch); - void SetJumpTarget(const FixupBranch& branch, const u8* target); - - void SETcc(CCFlags flag, OpArg dest); - // Note: CMOV brings small if any benefit on current cpus. - void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag); - - // Fences - void LFENCE(); - void MFENCE(); - void SFENCE(); - - // Bit scan - void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit - void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit - - // Cache control - enum PrefetchLevel { - PF_NTA, // Non-temporal (data used once and only once) - PF_T0, // All cache levels - PF_T1, // Levels 2+ (aliased to T0 on AMD) - PF_T2, // Levels 3+ (aliased to T0 on AMD) - }; - void PREFETCH(PrefetchLevel level, OpArg arg); - void MOVNTI(int bits, const OpArg& dest, X64Reg src); - void MOVNTDQ(const OpArg& arg, X64Reg regOp); - void MOVNTPS(const OpArg& arg, X64Reg regOp); - void MOVNTPD(const OpArg& arg, X64Reg regOp); - - // Multiplication / division - void MUL(int bits, const OpArg& src); // UNSIGNED - void IMUL(int bits, const OpArg& src); // SIGNED - void IMUL(int bits, X64Reg regOp, const OpArg& src); - void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm); - void DIV(int bits, const OpArg& src); - void IDIV(int bits, const OpArg& src); - - // Shift - void ROL(int bits, const OpArg& dest, const OpArg& shift); - void ROR(int bits, const OpArg& dest, const OpArg& shift); - void RCL(int bits, const OpArg& dest, const OpArg& shift); - void RCR(int bits, const OpArg& dest, const OpArg& shift); - void SHL(int bits, const OpArg& dest, const OpArg& shift); - void SHR(int bits, const OpArg& dest, const OpArg& shift); - void SAR(int bits, const OpArg& dest, const OpArg& shift); - - // Bit Test - void BT(int bits, const OpArg& dest, const OpArg& index); - void BTS(int bits, const OpArg& dest, const OpArg& index); - void BTR(int bits, const OpArg& dest, const OpArg& index); - void BTC(int bits, const OpArg& dest, const OpArg& index); - - // Double-Precision Shift - void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); - void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); - - // Extend EAX into EDX in various ways - void CWD(int bits = 16); - void CDQ() { - CWD(32); - } - void CQO() { - CWD(64); - } - void CBW(int bits = 8); - void CWDE() { - CBW(16); - } - void CDQE() { - CBW(32); - } - - // Load effective address - void LEA(int bits, X64Reg dest, OpArg src); - - // Integer arithmetic - void NEG(int bits, const OpArg& src); - void ADD(int bits, const OpArg& a1, const OpArg& a2); - void ADC(int bits, const OpArg& a1, const OpArg& a2); - void SUB(int bits, const OpArg& a1, const OpArg& a2); - void SBB(int bits, const OpArg& a1, const OpArg& a2); - void AND(int bits, const OpArg& a1, const OpArg& a2); - void CMP(int bits, const OpArg& a1, const OpArg& a2); - - // Bit operations - void NOT(int bits, const OpArg& src); - void OR(int bits, const OpArg& a1, const OpArg& a2); - void XOR(int bits, const OpArg& a1, const OpArg& a2); - void MOV(int bits, const OpArg& a1, const OpArg& a2); - void TEST(int bits, const OpArg& a1, const OpArg& a2); - - // Are these useful at all? Consider removing. - void XCHG(int bits, const OpArg& a1, const OpArg& a2); - void XCHG_AHAL(); - - // Byte swapping (32 and 64-bit only). - void BSWAP(int bits, X64Reg reg); - - // Sign/zero extension - void MOVSX(int dbits, int sbits, X64Reg dest, - OpArg src); // automatically uses MOVSXD if necessary - void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src); - - // Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe. - void MOVBE(int dbits, const OpArg& dest, const OpArg& src); - - // Available only on AMD >= Phenom or Intel >= Haswell - void LZCNT(int bits, X64Reg dest, const OpArg& src); - // Note: this one is actually part of BMI1 - void TZCNT(int bits, X64Reg dest, const OpArg& src); - - // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) - void STMXCSR(const OpArg& memloc); - void LDMXCSR(const OpArg& memloc); - - // Prefixes - void LOCK(); - void REP(); - void REPNE(); - void FSOverride(); - void GSOverride(); - - // x87 - enum x87StatusWordBits { - x87_InvalidOperation = 0x1, - x87_DenormalizedOperand = 0x2, - x87_DivisionByZero = 0x4, - x87_Overflow = 0x8, - x87_Underflow = 0x10, - x87_Precision = 0x20, - x87_StackFault = 0x40, - x87_ErrorSummary = 0x80, - x87_C0 = 0x100, - x87_C1 = 0x200, - x87_C2 = 0x400, - x87_TopOfStack = 0x2000 | 0x1000 | 0x800, - x87_C3 = 0x4000, - x87_FPUBusy = 0x8000, - }; - - void FLD(int bits, const OpArg& src); - void FST(int bits, const OpArg& dest); - void FSTP(int bits, const OpArg& dest); - void FNSTSW_AX(); - void FWAIT(); - - // SSE/SSE2: Floating point arithmetic - void ADDSS(X64Reg regOp, const OpArg& arg); - void ADDSD(X64Reg regOp, const OpArg& arg); - void SUBSS(X64Reg regOp, const OpArg& arg); - void SUBSD(X64Reg regOp, const OpArg& arg); - void MULSS(X64Reg regOp, const OpArg& arg); - void MULSD(X64Reg regOp, const OpArg& arg); - void DIVSS(X64Reg regOp, const OpArg& arg); - void DIVSD(X64Reg regOp, const OpArg& arg); - void MINSS(X64Reg regOp, const OpArg& arg); - void MINSD(X64Reg regOp, const OpArg& arg); - void MAXSS(X64Reg regOp, const OpArg& arg); - void MAXSD(X64Reg regOp, const OpArg& arg); - void SQRTSS(X64Reg regOp, const OpArg& arg); - void SQRTSD(X64Reg regOp, const OpArg& arg); - void RCPSS(X64Reg regOp, const OpArg& arg); - void RSQRTSS(X64Reg regOp, const OpArg& arg); - - // SSE/SSE2: Floating point bitwise (yes) - void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare); - void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare); - - void CMPEQSS(X64Reg regOp, const OpArg& arg) { - CMPSS(regOp, arg, CMP_EQ); - } - void CMPLTSS(X64Reg regOp, const OpArg& arg) { - CMPSS(regOp, arg, CMP_LT); - } - void CMPLESS(X64Reg regOp, const OpArg& arg) { - CMPSS(regOp, arg, CMP_LE); - } - void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { - CMPSS(regOp, arg, CMP_UNORD); - } - void CMPNEQSS(X64Reg regOp, const OpArg& arg) { - CMPSS(regOp, arg, CMP_NEQ); - } - void CMPNLTSS(X64Reg regOp, const OpArg& arg) { - CMPSS(regOp, arg, CMP_NLT); - } - void CMPORDSS(X64Reg regOp, const OpArg& arg) { - CMPSS(regOp, arg, CMP_ORD); - } - - // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) - void ADDPS(X64Reg regOp, const OpArg& arg); - void ADDPD(X64Reg regOp, const OpArg& arg); - void SUBPS(X64Reg regOp, const OpArg& arg); - void SUBPD(X64Reg regOp, const OpArg& arg); - void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare); - void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare); - void MULPS(X64Reg regOp, const OpArg& arg); - void MULPD(X64Reg regOp, const OpArg& arg); - void DIVPS(X64Reg regOp, const OpArg& arg); - void DIVPD(X64Reg regOp, const OpArg& arg); - void MINPS(X64Reg regOp, const OpArg& arg); - void MINPD(X64Reg regOp, const OpArg& arg); - void MAXPS(X64Reg regOp, const OpArg& arg); - void MAXPD(X64Reg regOp, const OpArg& arg); - void SQRTPS(X64Reg regOp, const OpArg& arg); - void SQRTPD(X64Reg regOp, const OpArg& arg); - void RCPPS(X64Reg regOp, const OpArg& arg); - void RSQRTPS(X64Reg regOp, const OpArg& arg); - - // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) - void ANDPS(X64Reg regOp, const OpArg& arg); - void ANDPD(X64Reg regOp, const OpArg& arg); - void ANDNPS(X64Reg regOp, const OpArg& arg); - void ANDNPD(X64Reg regOp, const OpArg& arg); - void ORPS(X64Reg regOp, const OpArg& arg); - void ORPD(X64Reg regOp, const OpArg& arg); - void XORPS(X64Reg regOp, const OpArg& arg); - void XORPD(X64Reg regOp, const OpArg& arg); - - // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. - void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle); - void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle); - - // SSE/SSE2: Useful alternative to shuffle in some cases. - void MOVDDUP(X64Reg regOp, const OpArg& arg); - - // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily - // on Ivy. - void HADDPS(X64Reg dest, const OpArg& src); - - // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg - // contains both a read mask and a write "mask". - void DPPS(X64Reg dest, const OpArg& src, u8 arg); - - void UNPCKLPS(X64Reg dest, const OpArg& src); - void UNPCKHPS(X64Reg dest, const OpArg& src); - void UNPCKLPD(X64Reg dest, const OpArg& src); - void UNPCKHPD(X64Reg dest, const OpArg& src); - - // SSE/SSE2: Compares. - void COMISS(X64Reg regOp, const OpArg& arg); - void COMISD(X64Reg regOp, const OpArg& arg); - void UCOMISS(X64Reg regOp, const OpArg& arg); - void UCOMISD(X64Reg regOp, const OpArg& arg); - - // SSE/SSE2: Moves. Use the right data type for your data, in most cases. - void MOVAPS(X64Reg regOp, const OpArg& arg); - void MOVAPD(X64Reg regOp, const OpArg& arg); - void MOVAPS(const OpArg& arg, X64Reg regOp); - void MOVAPD(const OpArg& arg, X64Reg regOp); - - void MOVUPS(X64Reg regOp, const OpArg& arg); - void MOVUPD(X64Reg regOp, const OpArg& arg); - void MOVUPS(const OpArg& arg, X64Reg regOp); - void MOVUPD(const OpArg& arg, X64Reg regOp); - - void MOVDQA(X64Reg regOp, const OpArg& arg); - void MOVDQA(const OpArg& arg, X64Reg regOp); - void MOVDQU(X64Reg regOp, const OpArg& arg); - void MOVDQU(const OpArg& arg, X64Reg regOp); - - void MOVSS(X64Reg regOp, const OpArg& arg); - void MOVSD(X64Reg regOp, const OpArg& arg); - void MOVSS(const OpArg& arg, X64Reg regOp); - void MOVSD(const OpArg& arg, X64Reg regOp); - - void MOVLPS(X64Reg regOp, const OpArg& arg); - void MOVLPD(X64Reg regOp, const OpArg& arg); - void MOVLPS(const OpArg& arg, X64Reg regOp); - void MOVLPD(const OpArg& arg, X64Reg regOp); - - void MOVHPS(X64Reg regOp, const OpArg& arg); - void MOVHPD(X64Reg regOp, const OpArg& arg); - void MOVHPS(const OpArg& arg, X64Reg regOp); - void MOVHPD(const OpArg& arg, X64Reg regOp); - - void MOVHLPS(X64Reg regOp1, X64Reg regOp2); - void MOVLHPS(X64Reg regOp1, X64Reg regOp2); - - void MOVD_xmm(X64Reg dest, const OpArg& arg); - void MOVQ_xmm(X64Reg dest, OpArg arg); - void MOVD_xmm(const OpArg& arg, X64Reg src); - void MOVQ_xmm(OpArg arg, X64Reg src); - - // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in - // question. - void MOVMSKPS(X64Reg dest, const OpArg& arg); - void MOVMSKPD(X64Reg dest, const OpArg& arg); - - // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a - // weird one. - void MASKMOVDQU(X64Reg dest, X64Reg src); - void LDDQU(X64Reg dest, const OpArg& src); - - // SSE/SSE2: Data type conversions. - void CVTPS2PD(X64Reg dest, const OpArg& src); - void CVTPD2PS(X64Reg dest, const OpArg& src); - void CVTSS2SD(X64Reg dest, const OpArg& src); - void CVTSI2SS(X64Reg dest, const OpArg& src); - void CVTSD2SS(X64Reg dest, const OpArg& src); - void CVTSI2SD(X64Reg dest, const OpArg& src); - void CVTDQ2PD(X64Reg regOp, const OpArg& arg); - void CVTPD2DQ(X64Reg regOp, const OpArg& arg); - void CVTDQ2PS(X64Reg regOp, const OpArg& arg); - void CVTPS2DQ(X64Reg regOp, const OpArg& arg); - - void CVTTPS2DQ(X64Reg regOp, const OpArg& arg); - void CVTTPD2DQ(X64Reg regOp, const OpArg& arg); - - // Destinations are X64 regs (rax, rbx, ...) for these instructions. - void CVTSS2SI(X64Reg xregdest, const OpArg& src); - void CVTSD2SI(X64Reg xregdest, const OpArg& src); - void CVTTSS2SI(X64Reg xregdest, const OpArg& arg); - void CVTTSD2SI(X64Reg xregdest, const OpArg& arg); - - // SSE2: Packed integer instructions - void PACKSSDW(X64Reg dest, const OpArg& arg); - void PACKSSWB(X64Reg dest, const OpArg& arg); - void PACKUSDW(X64Reg dest, const OpArg& arg); - void PACKUSWB(X64Reg dest, const OpArg& arg); - - void PUNPCKLBW(X64Reg dest, const OpArg& arg); - void PUNPCKLWD(X64Reg dest, const OpArg& arg); - void PUNPCKLDQ(X64Reg dest, const OpArg& arg); - void PUNPCKLQDQ(X64Reg dest, const OpArg& arg); - - void PTEST(X64Reg dest, const OpArg& arg); - void PAND(X64Reg dest, const OpArg& arg); - void PANDN(X64Reg dest, const OpArg& arg); - void PXOR(X64Reg dest, const OpArg& arg); - void POR(X64Reg dest, const OpArg& arg); - - void PADDB(X64Reg dest, const OpArg& arg); - void PADDW(X64Reg dest, const OpArg& arg); - void PADDD(X64Reg dest, const OpArg& arg); - void PADDQ(X64Reg dest, const OpArg& arg); - - void PADDSB(X64Reg dest, const OpArg& arg); - void PADDSW(X64Reg dest, const OpArg& arg); - void PADDUSB(X64Reg dest, const OpArg& arg); - void PADDUSW(X64Reg dest, const OpArg& arg); - - void PSUBB(X64Reg dest, const OpArg& arg); - void PSUBW(X64Reg dest, const OpArg& arg); - void PSUBD(X64Reg dest, const OpArg& arg); - void PSUBQ(X64Reg dest, const OpArg& arg); - - void PSUBSB(X64Reg dest, const OpArg& arg); - void PSUBSW(X64Reg dest, const OpArg& arg); - void PSUBUSB(X64Reg dest, const OpArg& arg); - void PSUBUSW(X64Reg dest, const OpArg& arg); - - void PAVGB(X64Reg dest, const OpArg& arg); - void PAVGW(X64Reg dest, const OpArg& arg); - - void PCMPEQB(X64Reg dest, const OpArg& arg); - void PCMPEQW(X64Reg dest, const OpArg& arg); - void PCMPEQD(X64Reg dest, const OpArg& arg); - - void PCMPGTB(X64Reg dest, const OpArg& arg); - void PCMPGTW(X64Reg dest, const OpArg& arg); - void PCMPGTD(X64Reg dest, const OpArg& arg); - - void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg); - void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg); - - void PMADDWD(X64Reg dest, const OpArg& arg); - void PSADBW(X64Reg dest, const OpArg& arg); - - void PMAXSW(X64Reg dest, const OpArg& arg); - void PMAXUB(X64Reg dest, const OpArg& arg); - void PMINSW(X64Reg dest, const OpArg& arg); - void PMINUB(X64Reg dest, const OpArg& arg); - // SSE4: More MAX/MIN instructions. - void PMINSB(X64Reg dest, const OpArg& arg); - void PMINSD(X64Reg dest, const OpArg& arg); - void PMINUW(X64Reg dest, const OpArg& arg); - void PMINUD(X64Reg dest, const OpArg& arg); - void PMAXSB(X64Reg dest, const OpArg& arg); - void PMAXSD(X64Reg dest, const OpArg& arg); - void PMAXUW(X64Reg dest, const OpArg& arg); - void PMAXUD(X64Reg dest, const OpArg& arg); - - void PMOVMSKB(X64Reg dest, const OpArg& arg); - void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle); - void PSHUFB(X64Reg dest, const OpArg& arg); - - void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle); - void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle); - - void PSRLW(X64Reg reg, int shift); - void PSRLD(X64Reg reg, int shift); - void PSRLQ(X64Reg reg, int shift); - void PSRLQ(X64Reg reg, const OpArg& arg); - void PSRLDQ(X64Reg reg, int shift); - - void PSLLW(X64Reg reg, int shift); - void PSLLD(X64Reg reg, int shift); - void PSLLQ(X64Reg reg, int shift); - void PSLLDQ(X64Reg reg, int shift); - - void PSRAW(X64Reg reg, int shift); - void PSRAD(X64Reg reg, int shift); - - // SSE4: data type conversions - void PMOVSXBW(X64Reg dest, const OpArg& arg); - void PMOVSXBD(X64Reg dest, const OpArg& arg); - void PMOVSXBQ(X64Reg dest, const OpArg& arg); - void PMOVSXWD(X64Reg dest, const OpArg& arg); - void PMOVSXWQ(X64Reg dest, const OpArg& arg); - void PMOVSXDQ(X64Reg dest, const OpArg& arg); - void PMOVZXBW(X64Reg dest, const OpArg& arg); - void PMOVZXBD(X64Reg dest, const OpArg& arg); - void PMOVZXBQ(X64Reg dest, const OpArg& arg); - void PMOVZXWD(X64Reg dest, const OpArg& arg); - void PMOVZXWQ(X64Reg dest, const OpArg& arg); - void PMOVZXDQ(X64Reg dest, const OpArg& arg); - - // SSE4: variable blend instructions (xmm0 implicit argument) - void PBLENDVB(X64Reg dest, const OpArg& arg); - void BLENDVPS(X64Reg dest, const OpArg& arg); - void BLENDVPD(X64Reg dest, const OpArg& arg); - void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); - void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); - - // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) - void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode); - void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode); - void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode); - void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode); - - void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { - ROUNDSS(dest, arg, FROUND_NEAREST); - } - void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { - ROUNDSS(dest, arg, FROUND_FLOOR); - } - void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { - ROUNDSS(dest, arg, FROUND_CEIL); - } - void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { - ROUNDSS(dest, arg, FROUND_ZERO); - } - - void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { - ROUNDSD(dest, arg, FROUND_NEAREST); - } - void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { - ROUNDSD(dest, arg, FROUND_FLOOR); - } - void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { - ROUNDSD(dest, arg, FROUND_CEIL); - } - void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { - ROUNDSD(dest, arg, FROUND_ZERO); - } - - void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { - ROUNDPS(dest, arg, FROUND_NEAREST); - } - void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { - ROUNDPS(dest, arg, FROUND_FLOOR); - } - void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { - ROUNDPS(dest, arg, FROUND_CEIL); - } - void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { - ROUNDPS(dest, arg, FROUND_ZERO); - } - - void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { - ROUNDPD(dest, arg, FROUND_NEAREST); - } - void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { - ROUNDPD(dest, arg, FROUND_FLOOR); - } - void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { - ROUNDPD(dest, arg, FROUND_CEIL); - } - void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { - ROUNDPD(dest, arg, FROUND_ZERO); - } - - // AVX - void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle); - void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - - void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - - void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - - // FMA3 - void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - - // VEX GPR instructions - void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); - void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); - void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); - void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate); - void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); - void BLSR(int bits, X64Reg regOp, const OpArg& arg); - void BLSMSK(int bits, X64Reg regOp, const OpArg& arg); - void BLSI(int bits, X64Reg regOp, const OpArg& arg); - void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); - void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); - - void RDTSC(); - - // Utility functions - // The difference between this and CALL is that this aligns the stack - // where appropriate. - void ABI_CallFunction(const void* func); - template <typename T> - void ABI_CallFunction(T (*func)()) { - ABI_CallFunction((const void*)func); - } - - void ABI_CallFunction(const u8* func) { - ABI_CallFunction((const void*)func); - } - void ABI_CallFunctionC16(const void* func, u16 param1); - void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2); - - // These only support u32 parameters, but that's enough for a lot of uses. - // These will destroy the 1 or 2 first "parameter regs". - void ABI_CallFunctionC(const void* func, u32 param1); - void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2); - void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3); - void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3); - void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4); - void ABI_CallFunctionP(const void* func, void* param1); - void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2); - void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3); - void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3); - void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2); - void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3); - void ABI_CallFunctionA(const void* func, const OpArg& arg1); - void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2); - - // Pass a register as a parameter. - void ABI_CallFunctionR(const void* func, X64Reg reg1); - void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2); - - template <typename Tr, typename T1> - void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { - ABI_CallFunctionC((const void*)func, param1); - } - - /** - * Saves specified registers and adjusts the stack to be 16-byte aligned as required by the ABI - * - * @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs) - * @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8 - * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the - * stack - * @return Size of the shadow space, i.e., offset of the frame - */ - size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, - size_t needed_frame_size = 0); - - /** - * Restores specified registers and adjusts the stack to its original alignment, i.e., the - * alignment before - * the matching PushRegistersAndAdjustStack. - * - * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are - * GPRs) - * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must - * be 0 or 8 - * @param needed_frame_size Additional space that was needed - * @warning Stack must be currently 16-byte aligned - */ - void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, - size_t needed_frame_size = 0); - -#ifdef _M_IX86 - static int ABI_GetNumXMMRegs() { - return 8; - } -#else - static int ABI_GetNumXMMRegs() { - return 16; - } -#endif -}; // class XEmitter - -// Everything that needs to generate X86 code should inherit from this. -// You get memory management for free, plus, you can use all the MOV etc functions without -// having to prefix them with gen-> or something similar. - -class XCodeBlock : public CodeBlock<XEmitter> { -public: - void PoisonMemory() override; -}; - -} // namespace diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index faad0a561..fa7878c65 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -2,6 +2,7 @@ set(SRCS arm/disassembler/arm_disasm.cpp arm/disassembler/load_symbol_map.cpp arm/dynarmic/arm_dynarmic.cpp + arm/dynarmic/arm_dynarmic_cp15.cpp arm/dyncom/arm_dyncom.cpp arm/dyncom/arm_dyncom_dec.cpp arm/dyncom/arm_dyncom_interpreter.cpp @@ -29,6 +30,9 @@ set(SRCS file_sys/ivfc_archive.cpp file_sys/path_parser.cpp file_sys/savedata_archive.cpp + frontend/camera/blank_camera.cpp + frontend/camera/factory.cpp + frontend/camera/interface.cpp frontend/emu_window.cpp frontend/key_map.cpp frontend/motion_emu.cpp @@ -37,6 +41,7 @@ set(SRCS hle/applets/applet.cpp hle/applets/erreula.cpp hle/applets/mii_selector.cpp + hle/applets/mint.cpp hle/applets/swkbd.cpp hle/kernel/address_arbiter.cpp hle/kernel/client_port.cpp @@ -173,6 +178,7 @@ set(HEADERS arm/disassembler/arm_disasm.h arm/disassembler/load_symbol_map.h arm/dynarmic/arm_dynarmic.h + arm/dynarmic/arm_dynarmic_cp15.h arm/dyncom/arm_dyncom.h arm/dyncom/arm_dyncom_dec.h arm/dyncom/arm_dyncom_interpreter.h @@ -203,6 +209,9 @@ set(HEADERS file_sys/ivfc_archive.h file_sys/path_parser.h file_sys/savedata_archive.h + frontend/camera/blank_camera.h + frontend/camera/factory.h + frontend/camera/interface.h frontend/emu_window.h frontend/key_map.h frontend/motion_emu.h @@ -213,6 +222,7 @@ set(HEADERS hle/applets/applet.h hle/applets/erreula.h hle/applets/mii_selector.h + hle/applets/mint.h hle/applets/swkbd.h hle/kernel/address_arbiter.h hle/kernel/client_port.h diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 9f25e3b00..7d2790b08 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/arm/dynarmic/arm_dynarmic.h" +#include "core/arm/dynarmic/arm_dynarmic_cp15.h" #include "core/arm/dyncom/arm_dyncom_interpreter.h" #include "core/core.h" #include "core/core_timing.h" @@ -39,28 +40,30 @@ static bool IsReadOnlyMemory(u32 vaddr) { return false; } -static Dynarmic::UserCallbacks GetUserCallbacks(ARMul_State* interpeter_state) { +static Dynarmic::UserCallbacks GetUserCallbacks( + const std::shared_ptr<ARMul_State>& interpeter_state) { Dynarmic::UserCallbacks user_callbacks{}; user_callbacks.InterpreterFallback = &InterpreterFallback; - user_callbacks.user_arg = static_cast<void*>(interpeter_state); + user_callbacks.user_arg = static_cast<void*>(interpeter_state.get()); user_callbacks.CallSVC = &SVC::CallSVC; - user_callbacks.IsReadOnlyMemory = &IsReadOnlyMemory; - user_callbacks.MemoryReadCode = &Memory::Read32; - user_callbacks.MemoryRead8 = &Memory::Read8; - user_callbacks.MemoryRead16 = &Memory::Read16; - user_callbacks.MemoryRead32 = &Memory::Read32; - user_callbacks.MemoryRead64 = &Memory::Read64; - user_callbacks.MemoryWrite8 = &Memory::Write8; - user_callbacks.MemoryWrite16 = &Memory::Write16; - user_callbacks.MemoryWrite32 = &Memory::Write32; - user_callbacks.MemoryWrite64 = &Memory::Write64; + user_callbacks.memory.IsReadOnlyMemory = &IsReadOnlyMemory; + user_callbacks.memory.ReadCode = &Memory::Read32; + user_callbacks.memory.Read8 = &Memory::Read8; + user_callbacks.memory.Read16 = &Memory::Read16; + user_callbacks.memory.Read32 = &Memory::Read32; + user_callbacks.memory.Read64 = &Memory::Read64; + user_callbacks.memory.Write8 = &Memory::Write8; + user_callbacks.memory.Write16 = &Memory::Write16; + user_callbacks.memory.Write32 = &Memory::Write32; + user_callbacks.memory.Write64 = &Memory::Write64; user_callbacks.page_table = Memory::GetCurrentPageTablePointers(); + user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpeter_state); return user_callbacks; } ARM_Dynarmic::ARM_Dynarmic(PrivilegeMode initial_mode) { - interpreter_state = std::make_unique<ARMul_State>(initial_mode); - jit = std::make_unique<Dynarmic::Jit>(GetUserCallbacks(interpreter_state.get())); + interpreter_state = std::make_shared<ARMul_State>(initial_mode); + jit = std::make_unique<Dynarmic::Jit>(GetUserCallbacks(interpreter_state)); } void ARM_Dynarmic::SetPC(u32 pc) { diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 87ab53d81..834dc989e 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -39,5 +39,5 @@ public: private: std::unique_ptr<Dynarmic::Jit> jit; - std::unique_ptr<ARMul_State> interpreter_state; + std::shared_ptr<ARMul_State> interpreter_state; }; diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp new file mode 100644 index 000000000..b1fdce096 --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp @@ -0,0 +1,88 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/arm/dynarmic/arm_dynarmic_cp15.h" +#include "core/arm/skyeye_common/arm_regformat.h" +#include "core/arm/skyeye_common/armstate.h" + +using Callback = Dynarmic::Coprocessor::Callback; +using CallbackOrAccessOneWord = Dynarmic::Coprocessor::CallbackOrAccessOneWord; +using CallbackOrAccessTwoWords = Dynarmic::Coprocessor::CallbackOrAccessTwoWords; + +DynarmicCP15::DynarmicCP15(const std::shared_ptr<ARMul_State>& state) : interpreter_state(state) {} + +DynarmicCP15::~DynarmicCP15() = default; + +boost::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, + CoprocReg CRd, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) { + return boost::none; +} + +CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) { + // TODO(merry): Privileged CP15 registers + + if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { + // This is a dummy write, we ignore the value written here. + return &interpreter_state->CP15[CP15_FLUSH_PREFETCH_BUFFER]; + } + + if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { + switch (opc2) { + case 4: + // This is a dummy write, we ignore the value written here. + return &interpreter_state->CP15[CP15_DATA_SYNC_BARRIER]; + case 5: + // This is a dummy write, we ignore the value written here. + return &interpreter_state->CP15[CP15_DATA_MEMORY_BARRIER]; + default: + return boost::blank{}; + } + } + + if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { + return &interpreter_state->CP15[CP15_THREAD_UPRW]; + } + + return boost::blank{}; +} + +CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { + return boost::blank{}; +} + +CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) { + // TODO(merry): Privileged CP15 registers + + if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { + switch (opc2) { + case 2: + return &interpreter_state->CP15[CP15_THREAD_UPRW]; + case 3: + return &interpreter_state->CP15[CP15_THREAD_URO]; + default: + return boost::blank{}; + } + } + + return boost::blank{}; +} + +CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { + return boost::blank{}; +} + +boost::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, + CoprocReg CRd, + boost::optional<u8> option) { + return boost::none; +} + +boost::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, + CoprocReg CRd, + boost::optional<u8> option) { + return boost::none; +} diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h new file mode 100644 index 000000000..7fa54e14c --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h @@ -0,0 +1,32 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <dynarmic/coprocessor.h> +#include "common/common_types.h" + +struct ARMul_State; + +class DynarmicCP15 final : public Dynarmic::Coprocessor { +public: + explicit DynarmicCP15(const std::shared_ptr<ARMul_State>&); + ~DynarmicCP15() override; + + boost::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, + CoprocReg CRn, CoprocReg CRm, + unsigned opc2) override; + CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) override; + CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override; + CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, + unsigned opc2) override; + CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override; + boost::optional<Callback> CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, + boost::optional<u8> option) override; + boost::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, + boost::optional<u8> option) override; + +private: + std::shared_ptr<ARMul_State> interpreter_state; +}; diff --git a/src/core/frontend/camera/blank_camera.cpp b/src/core/frontend/camera/blank_camera.cpp new file mode 100644 index 000000000..7995abcbd --- /dev/null +++ b/src/core/frontend/camera/blank_camera.cpp @@ -0,0 +1,31 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/frontend/camera/blank_camera.h" + +namespace Camera { + +void BlankCamera::StartCapture() {} + +void BlankCamera::StopCapture() {} + +void BlankCamera::SetFormat(Service::CAM::OutputFormat output_format) { + output_rgb = output_format == Service::CAM::OutputFormat::RGB565; +} + +void BlankCamera::SetResolution(const Service::CAM::Resolution& resolution) { + width = resolution.width; + height = resolution.height; +}; + +void BlankCamera::SetFlip(Service::CAM::Flip) {} + +void BlankCamera::SetEffect(Service::CAM::Effect) {} + +std::vector<u16> BlankCamera::ReceiveFrame() const { + // Note: 0x80008000 stands for two black pixels in YUV422 + return std::vector<u16>(width * height, output_rgb ? 0 : 0x8000); +} + +} // namespace Camera diff --git a/src/core/frontend/camera/blank_camera.h b/src/core/frontend/camera/blank_camera.h new file mode 100644 index 000000000..c6619bd88 --- /dev/null +++ b/src/core/frontend/camera/blank_camera.h @@ -0,0 +1,28 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/frontend/camera/factory.h" +#include "core/frontend/camera/interface.h" + +namespace Camera { + +class BlankCamera final : public CameraInterface { +public: + void StartCapture() override; + void StopCapture() override; + void SetResolution(const Service::CAM::Resolution&) override; + void SetFlip(Service::CAM::Flip) override; + void SetEffect(Service::CAM::Effect) override; + void SetFormat(Service::CAM::OutputFormat) override; + std::vector<u16> ReceiveFrame() const override; + +private: + int width = 0; + int height = 0; + bool output_rgb = false; +}; + +} // namespace Camera diff --git a/src/core/frontend/camera/factory.cpp b/src/core/frontend/camera/factory.cpp new file mode 100644 index 000000000..4b4da50dd --- /dev/null +++ b/src/core/frontend/camera/factory.cpp @@ -0,0 +1,32 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <unordered_map> +#include "common/logging/log.h" +#include "core/frontend/camera/blank_camera.h" +#include "core/frontend/camera/factory.h" + +namespace Camera { + +static std::unordered_map<std::string, std::unique_ptr<CameraFactory>> factories; + +CameraFactory::~CameraFactory() = default; + +void RegisterFactory(const std::string& name, std::unique_ptr<CameraFactory> factory) { + factories[name] = std::move(factory); +} + +std::unique_ptr<CameraInterface> CreateCamera(const std::string& name, const std::string& config) { + auto pair = factories.find(name); + if (pair != factories.end()) { + return pair->second->Create(config); + } + + if (name != "blank") { + LOG_ERROR(Service_CAM, "Unknown camera \"%s\"", name.c_str()); + } + return std::make_unique<BlankCamera>(); +} + +} // namespace Camera diff --git a/src/core/frontend/camera/factory.h b/src/core/frontend/camera/factory.h new file mode 100644 index 000000000..d68be16e5 --- /dev/null +++ b/src/core/frontend/camera/factory.h @@ -0,0 +1,41 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <string> +#include "core/frontend/camera/interface.h" + +namespace Camera { + +class CameraFactory { +public: + virtual ~CameraFactory(); + + /** + * Creates a camera object based on the configuration string. + * @params config Configuration string to create the camera. The implementation can decide the + * meaning of this string. + * @returns a unique_ptr to the created camera object. + */ + virtual std::unique_ptr<CameraInterface> Create(const std::string& config) const = 0; +}; + +/** + * Registers an external camera factory. + * @param name Identifier of the camera factory. + * @param factory Camera factory to register. + */ +void RegisterFactory(const std::string& name, std::unique_ptr<CameraFactory> factory); + +/** + * Creates a camera from the factory. + * @param name Identifier of the camera factory. + * @param config Configuration string to create the camera. The meaning of this string is + * defined by the factory. + */ +std::unique_ptr<CameraInterface> CreateCamera(const std::string& name, const std::string& config); + +} // namespace Camera diff --git a/src/core/frontend/camera/interface.cpp b/src/core/frontend/camera/interface.cpp new file mode 100644 index 000000000..9aec9e7f1 --- /dev/null +++ b/src/core/frontend/camera/interface.cpp @@ -0,0 +1,11 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/frontend/camera/interface.h" + +namespace Camera { + +CameraInterface::~CameraInterface() = default; + +} // namespace Camera diff --git a/src/core/frontend/camera/interface.h b/src/core/frontend/camera/interface.h new file mode 100644 index 000000000..a55a495c9 --- /dev/null +++ b/src/core/frontend/camera/interface.h @@ -0,0 +1,61 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include "common/common_types.h" +#include "core/hle/service/cam/cam.h" + +namespace Camera { + +/// An abstract class standing for a camera. All camera implementations should inherit from this. +class CameraInterface { +public: + virtual ~CameraInterface(); + + /// Starts the camera for video capturing. + virtual void StartCapture() = 0; + + /// Stops the camera for video capturing. + virtual void StopCapture() = 0; + + /** + * Sets the video resolution from raw CAM service parameters. + * For the meaning of the parameters, please refer to Service::CAM::Resolution. Note that the + * actual camera implementation doesn't need to respect all the parameters. However, the width + * and the height parameters must be respected and be used to determine the size of output + * frames. + * @param resolution The resolution parameters to set + */ + virtual void SetResolution(const Service::CAM::Resolution& resolution) = 0; + + /** + * Configures how received frames should be flipped by the camera. + * @param flip Flip applying to the frame + */ + virtual void SetFlip(Service::CAM::Flip flip) = 0; + + /** + * Configures what effect should be applied to received frames by the camera. + * @param effect Effect applying to the frame + */ + virtual void SetEffect(Service::CAM::Effect effect) = 0; + + /** + * Sets the output format of the all frames received after this function is called. + * @param format Output format of the frame + */ + virtual void SetFormat(Service::CAM::OutputFormat format) = 0; + + /** + * Receives a frame from the camera. + * This function should be only called between a StartCapture call and a StopCapture call. + * @returns A std::vector<u16> containing pixels. The total size of the vector is width * height + * where width and height are set by a call to SetResolution. + */ + virtual std::vector<u16> ReceiveFrame() const = 0; +}; + +} // namespace Camera diff --git a/src/core/hle/applets/applet.cpp b/src/core/hle/applets/applet.cpp index 645b2d5fe..9c43ed2fd 100644 --- a/src/core/hle/applets/applet.cpp +++ b/src/core/hle/applets/applet.cpp @@ -12,6 +12,7 @@ #include "core/hle/applets/applet.h" #include "core/hle/applets/erreula.h" #include "core/hle/applets/mii_selector.h" +#include "core/hle/applets/mint.h" #include "core/hle/applets/swkbd.h" #include "core/hle/result.h" #include "core/hle/service/apt/apt.h" @@ -56,6 +57,10 @@ ResultCode Applet::Create(Service::APT::AppletId id) { case Service::APT::AppletId::Error2: applets[id] = std::make_shared<ErrEula>(id); break; + case Service::APT::AppletId::Mint: + case Service::APT::AppletId::Mint2: + applets[id] = std::make_shared<Mint>(id); + break; default: LOG_ERROR(Service_APT, "Could not create applet %u", id); // TODO(Subv): Find the right error code diff --git a/src/core/hle/applets/mint.cpp b/src/core/hle/applets/mint.cpp new file mode 100644 index 000000000..31a79ea17 --- /dev/null +++ b/src/core/hle/applets/mint.cpp @@ -0,0 +1,72 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/string_util.h" +#include "core/hle/applets/mint.h" +#include "core/hle/service/apt/apt.h" + +namespace HLE { +namespace Applets { + +ResultCode Mint::ReceiveParameter(const Service::APT::MessageParameter& parameter) { + if (parameter.signal != static_cast<u32>(Service::APT::SignalType::Request)) { + LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); + UNIMPLEMENTED(); + // TODO(Subv): Find the right error code + return ResultCode(-1); + } + + // The Request message contains a buffer with the size of the framebuffer shared + // memory. + // Create the SharedMemory that will hold the framebuffer data + Service::APT::CaptureBufferInfo capture_info; + ASSERT(sizeof(capture_info) == parameter.buffer.size()); + + memcpy(&capture_info, parameter.buffer.data(), sizeof(capture_info)); + + // TODO: allocated memory never released + using Kernel::MemoryPermission; + // Allocate a heap block of the required size for this applet. + heap_memory = std::make_shared<std::vector<u8>>(capture_info.size); + // Create a SharedMemory that directly points to this heap block. + framebuffer_memory = Kernel::SharedMemory::CreateForApplet( + heap_memory, 0, heap_memory->size(), MemoryPermission::ReadWrite, + MemoryPermission::ReadWrite, "Mint Memory"); + + // Send the response message with the newly created SharedMemory + Service::APT::MessageParameter result; + result.signal = static_cast<u32>(Service::APT::SignalType::Response); + result.buffer.clear(); + result.destination_id = static_cast<u32>(Service::APT::AppletId::Application); + result.sender_id = static_cast<u32>(id); + result.object = framebuffer_memory; + + Service::APT::SendParameter(result); + return RESULT_SUCCESS; +} + +ResultCode Mint::StartImpl(const Service::APT::AppletStartupParameter& parameter) { + is_running = true; + + // TODO(Subv): Set the expected fields in the response buffer before resending it to the + // application. + // TODO(Subv): Reverse the parameter format for the Mint applet + + // Let the application know that we're closing + Service::APT::MessageParameter message; + message.buffer.resize(parameter.buffer.size()); + std::fill(message.buffer.begin(), message.buffer.end(), 0); + message.signal = static_cast<u32>(Service::APT::SignalType::WakeupByExit); + message.destination_id = static_cast<u32>(Service::APT::AppletId::Application); + message.sender_id = static_cast<u32>(id); + Service::APT::SendParameter(message); + + is_running = false; + return RESULT_SUCCESS; +} + +void Mint::Update() {} + +} // namespace Applets +} // namespace HLE diff --git a/src/core/hle/applets/mint.h b/src/core/hle/applets/mint.h new file mode 100644 index 000000000..d23dc40f9 --- /dev/null +++ b/src/core/hle/applets/mint.h @@ -0,0 +1,29 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/hle/applets/applet.h" +#include "core/hle/kernel/shared_memory.h" + +namespace HLE { +namespace Applets { + +class Mint final : public Applet { +public: + explicit Mint(Service::APT::AppletId id) : Applet(id) {} + + ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; + ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; + void Update() override; + +private: + /// This SharedMemory will be created when we receive the Request message. + /// It holds the framebuffer info retrieved by the application with + /// GSPGPU::ImportDisplayCaptureInfo + Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory; +}; + +} // namespace Applets +} // namespace HLE diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp index ccd73cfcb..e386ccdc6 100644 --- a/src/core/hle/config_mem.cpp +++ b/src/core/hle/config_mem.cpp @@ -14,15 +14,18 @@ ConfigMemDef config_mem; void Init() { std::memset(&config_mem, 0, sizeof(config_mem)); - config_mem.update_flag = 0; // No update + // Values extracted from firmware 11.2.0-35E + config_mem.kernel_version_min = 0x34; + config_mem.kernel_version_maj = 0x2; + config_mem.ns_tid = 0x0004013000008002; config_mem.sys_core_ver = 0x2; config_mem.unit_info = 0x1; // Bit 0 set for Retail - config_mem.prev_firm = 0; - config_mem.firm_unk = 0; - config_mem.firm_version_rev = 0; - config_mem.firm_version_min = 0x40; + config_mem.prev_firm = 0x1; + config_mem.ctr_sdk_ver = 0x0000F297; + config_mem.firm_version_min = 0x34; config_mem.firm_version_maj = 0x2; config_mem.firm_sys_core_ver = 0x2; + config_mem.firm_ctr_sdk_ver = 0x0000F297; } } // namespace diff --git a/src/core/hle/service/cam/cam.cpp b/src/core/hle/service/cam/cam.cpp index 5594aedab..95665e754 100644 --- a/src/core/hle/service/cam/cam.cpp +++ b/src/core/hle/service/cam/cam.cpp @@ -2,7 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> +#include <future> +#include <memory> +#include <vector> +#include "common/bit_set.h" #include "common/logging/log.h" +#include "core/core_timing.h" +#include "core/frontend/camera/factory.h" #include "core/hle/kernel/event.h" #include "core/hle/service/cam/cam.h" #include "core/hle/service/cam/cam_c.h" @@ -10,206 +18,924 @@ #include "core/hle/service/cam/cam_s.h" #include "core/hle/service/cam/cam_u.h" #include "core/hle/service/service.h" +#include "core/settings.h" namespace Service { namespace CAM { -static const u32 TRANSFER_BYTES = 5 * 1024; +namespace { + +struct ContextConfig { + Flip flip; + Effect effect; + OutputFormat format; + Resolution resolution; +}; + +struct CameraConfig { + std::unique_ptr<Camera::CameraInterface> impl; + std::array<ContextConfig, 2> contexts; + int current_context; + FrameRate frame_rate; +}; + +struct PortConfig { + int camera_id; + + bool is_active; // set when the port is activated by an Activate call. + bool is_pending_receiving; // set if SetReceiving is called when is_busy = false. When + // StartCapture is called then, this will trigger a receiving + // process and reset itself. + bool is_busy; // set when StartCapture is called and reset when StopCapture is called. + bool is_receiving; // set when there is an ongoing receiving process. + + bool is_trimming; + u16 x0; // x-coordinate of starting position for trimming + u16 y0; // y-coordinate of starting position for trimming + u16 x1; // x-coordinate of ending position for trimming + u16 y1; // y-coordinate of ending position for trimming + + u32 transfer_bytes; + + Kernel::SharedPtr<Kernel::Event> completion_event; + Kernel::SharedPtr<Kernel::Event> buffer_error_interrupt_event; + Kernel::SharedPtr<Kernel::Event> vsync_interrupt_event; + + std::future<std::vector<u16>> capture_result; // will hold the received frame. + VAddr dest; // the destination address of a receiving process + u32 dest_size; // the destination size of a receiving process + + void Clear() { + completion_event->Clear(); + buffer_error_interrupt_event->Clear(); + vsync_interrupt_event->Clear(); + is_receiving = false; + is_active = false; + is_pending_receiving = false; + is_busy = false; + is_trimming = false; + x0 = 0; + y0 = 0; + x1 = 0; + y1 = 0; + transfer_bytes = 256; + } +}; + +// built-in resolution parameters +constexpr std::array<Resolution, 8> PRESET_RESOLUTION{{ + {640, 480, 0, 0, 639, 479}, // VGA + {320, 240, 0, 0, 639, 479}, // QVGA + {160, 120, 0, 0, 639, 479}, // QQVGA + {352, 288, 26, 0, 613, 479}, // CIF + {176, 144, 26, 0, 613, 479}, // QCIF + {256, 192, 0, 0, 639, 479}, // DS_LCD + {512, 384, 0, 0, 639, 479}, // DS_LCDx4 + {400, 240, 0, 48, 639, 431}, // CTR_TOP_LCD +}}; + +// latency in ms for each frame rate option +constexpr std::array<int, 13> LATENCY_BY_FRAME_RATE{{ + 67, // Rate_15 + 67, // Rate_15_To_5 + 67, // Rate_15_To_2 + 100, // Rate_10 + 118, // Rate_8_5 + 200, // Rate_5 + 50, // Rate_20 + 50, // Rate_20_To_5 + 33, // Rate_30 + 33, // Rate_30_To_5 + 67, // Rate_15_To_10 + 50, // Rate_20_To_10 + 33, // Rate_30_To_10 +}}; + +std::array<CameraConfig, NumCameras> cameras; +std::array<PortConfig, 2> ports; +int completion_event_callback; + +const ResultCode ERROR_INVALID_ENUM_VALUE(ErrorDescription::InvalidEnumValue, ErrorModule::CAM, + ErrorSummary::InvalidArgument, ErrorLevel::Usage); +const ResultCode ERROR_OUT_OF_RANGE(ErrorDescription::OutOfRange, ErrorModule::CAM, + ErrorSummary::InvalidArgument, ErrorLevel::Usage); + +void CompletionEventCallBack(u64 port_id, int) { + PortConfig& port = ports[port_id]; + const CameraConfig& camera = cameras[port.camera_id]; + const auto buffer = port.capture_result.get(); + + if (port.is_trimming) { + u32 trim_width; + u32 trim_height; + const int original_width = camera.contexts[camera.current_context].resolution.width; + const int original_height = camera.contexts[camera.current_context].resolution.height; + if (port.x1 <= port.x0 || port.y1 <= port.y0 || port.x1 > original_width || + port.y1 > original_height) { + LOG_ERROR(Service_CAM, "Invalid trimming coordinates x0=%u, y0=%u, x1=%u, y1=%u", + port.x0, port.y0, port.x1, port.y1); + trim_width = 0; + trim_height = 0; + } else { + trim_width = port.x1 - port.x0; + trim_height = port.y1 - port.y0; + } + + u32 trim_size = (port.x1 - port.x0) * (port.y1 - port.y0) * 2; + if (port.dest_size != trim_size) { + LOG_ERROR(Service_CAM, "The destination size (%u) doesn't match the source (%u)!", + port.dest_size, trim_size); + } + + const u32 src_offset = port.y0 * original_width + port.x0; + const u16* src_ptr = buffer.data() + src_offset; + // Note: src_size_left is int because it can be negative if the buffer size doesn't match. + int src_size_left = static_cast<int>((buffer.size() - src_offset) * sizeof(u16)); + VAddr dest_ptr = port.dest; + // Note: dest_size_left and line_bytes are int to match the type of src_size_left. + int dest_size_left = static_cast<int>(port.dest_size); + const int line_bytes = static_cast<int>(trim_width * sizeof(u16)); + + for (u32 y = 0; y < trim_height; ++y) { + int copy_length = std::min({line_bytes, dest_size_left, src_size_left}); + if (copy_length <= 0) { + break; + } + Memory::WriteBlock(dest_ptr, src_ptr, copy_length); + dest_ptr += copy_length; + dest_size_left -= copy_length; + src_ptr += original_width; + src_size_left -= original_width * sizeof(u16); + } + } else { + std::size_t buffer_size = buffer.size() * sizeof(u16); + if (port.dest_size != buffer_size) { + LOG_ERROR(Service_CAM, "The destination size (%u) doesn't match the source (%zu)!", + port.dest_size, buffer_size); + } + Memory::WriteBlock(port.dest, buffer.data(), std::min<u32>(port.dest_size, buffer_size)); + } + + port.is_receiving = false; + port.completion_event->Signal(); +} + +// Starts a receiving process on the specified port. This can only be called when is_busy = true and +// is_receiving = false. +void StartReceiving(int port_id) { + PortConfig& port = ports[port_id]; + port.is_receiving = true; + + // launches a capture task asynchronously + const CameraConfig& camera = cameras[port.camera_id]; + port.capture_result = + std::async(std::launch::async, &Camera::CameraInterface::ReceiveFrame, camera.impl.get()); + + // schedules a completion event according to the frame rate. The event will block on the + // capture task if it is not finished within the expected time + CoreTiming::ScheduleEvent( + msToCycles(LATENCY_BY_FRAME_RATE[static_cast<int>(camera.frame_rate)]), + completion_event_callback, port_id); +} + +// Cancels any ongoing receiving processes at the specified port. This is used by functions that +// stop capturing. +// TODO: what is the exact behaviour on real 3DS when stopping capture during an ongoing process? +// Will the completion event still be signaled? +void CancelReceiving(int port_id) { + if (!ports[port_id].is_receiving) + return; + LOG_WARNING(Service_CAM, "tries to cancel an ongoing receiving process."); + CoreTiming::UnscheduleEvent(completion_event_callback, port_id); + ports[port_id].capture_result.wait(); + ports[port_id].is_receiving = false; +} + +// Activates the specified port with the specfied camera. +static void ActivatePort(int port_id, int camera_id) { + if (ports[port_id].is_busy && ports[port_id].camera_id != camera_id) { + CancelReceiving(port_id); + cameras[ports[port_id].camera_id].impl->StopCapture(); + ports[port_id].is_busy = false; + } + ports[port_id].is_active = true; + ports[port_id].camera_id = camera_id; +} + +template <int max_index> +class CommandParamBitSet : public BitSet8 { +public: + explicit CommandParamBitSet(u32 command_param) + : BitSet8(static_cast<u8>(command_param & 0xFF)) {} -static Kernel::SharedPtr<Kernel::Event> completion_event_cam1; -static Kernel::SharedPtr<Kernel::Event> completion_event_cam2; -static Kernel::SharedPtr<Kernel::Event> interrupt_error_event; -static Kernel::SharedPtr<Kernel::Event> vsync_interrupt_error_event; + bool IsValid() const { + return m_val < (1 << max_index); + } + + bool IsSingle() const { + return IsValid() && Count() == 1; + } +}; + +using PortSet = CommandParamBitSet<2>; +using ContextSet = CommandParamBitSet<2>; +using CameraSet = CommandParamBitSet<3>; + +} // namespace void StartCapture(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 port = cmd_buff[1] & 0xFF; + const PortSet port_select(cmd_buff[1]); + + if (port_select.IsValid()) { + for (int i : port_select) { + if (!ports[i].is_busy) { + if (!ports[i].is_active) { + // This doesn't return an error, but seems to put the camera in an undefined + // state + LOG_ERROR(Service_CAM, "port %u hasn't been activated", i); + } else { + cameras[ports[i].camera_id].impl->StartCapture(); + ports[i].is_busy = true; + if (ports[i].is_pending_receiving) { + ports[i].is_pending_receiving = false; + StartReceiving(i); + } + } + } else { + LOG_WARNING(Service_CAM, "port %u already started", i); + } + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0); - cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); + LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val); } void StopCapture(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 port = cmd_buff[1] & 0xFF; + const PortSet port_select(cmd_buff[1]); + + if (port_select.IsValid()) { + for (int i : port_select) { + if (ports[i].is_busy) { + CancelReceiving(i); + cameras[ports[i].camera_id].impl->StopCapture(); + ports[i].is_busy = false; + } else { + LOG_WARNING(Service_CAM, "port %u already stopped", i); + } + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0x2, 1, 0); + + LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val); +} + +void IsBusy(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const PortSet port_select(cmd_buff[1]); + + if (port_select.IsValid()) { + bool is_busy = true; + // Note: the behaviour on no or both ports selected are verified against real 3DS. + for (int i : port_select) { + is_busy &= ports[i].is_busy; + } + cmd_buff[1] = RESULT_SUCCESS.raw; + cmd_buff[2] = is_busy ? 1 : 0; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0x3, 2, 0); + + LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val); +} + +void ClearBuffer(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const PortSet port_select(cmd_buff[1]); + + cmd_buff[0] = IPC::MakeHeader(0x4, 1, 0); cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); + LOG_WARNING(Service_CAM, "(STUBBED) called, port_select=%u", port_select.m_val); } void GetVsyncInterruptEvent(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 port = cmd_buff[1] & 0xFF; + const PortSet port_select(cmd_buff[1]); + + if (port_select.IsSingle()) { + int port = *port_select.begin(); + cmd_buff[1] = RESULT_SUCCESS.raw; + cmd_buff[2] = IPC::CopyHandleDesc(); + cmd_buff[3] = Kernel::g_handle_table.Create(ports[port].vsync_interrupt_event).MoveFrom(); + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + cmd_buff[2] = IPC::CopyHandleDesc(); + cmd_buff[2] = 0; + } cmd_buff[0] = IPC::MakeHeader(0x5, 1, 2); - cmd_buff[1] = RESULT_SUCCESS.raw; - cmd_buff[2] = IPC::CopyHandleDesc(); - cmd_buff[3] = Kernel::g_handle_table.Create(vsync_interrupt_error_event).MoveFrom(); - LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); + LOG_WARNING(Service_CAM, "(STUBBED) called, port_select=%u", port_select.m_val); } void GetBufferErrorInterruptEvent(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 port = cmd_buff[1] & 0xFF; - - cmd_buff[0] = IPC::MakeHeader(0x6, 1, 2); - cmd_buff[1] = RESULT_SUCCESS.raw; - cmd_buff[2] = IPC::CopyHandleDesc(); - cmd_buff[3] = Kernel::g_handle_table.Create(interrupt_error_event).MoveFrom(); - - LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); + const PortSet port_select(cmd_buff[1]); + + if (port_select.IsSingle()) { + int port = *port_select.begin(); + cmd_buff[1] = RESULT_SUCCESS.raw; + cmd_buff[2] = IPC::CopyHandleDesc(); + cmd_buff[3] = + Kernel::g_handle_table.Create(ports[port].buffer_error_interrupt_event).MoveFrom(); + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + cmd_buff[2] = IPC::CopyHandleDesc(); + cmd_buff[2] = 0; + } + + LOG_WARNING(Service_CAM, "(STUBBED) called, port_select=%u", port_select.m_val); } void SetReceiving(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - VAddr dest = cmd_buff[1]; - u8 port = cmd_buff[2] & 0xFF; - u32 image_size = cmd_buff[3]; - u16 trans_unit = cmd_buff[4] & 0xFFFF; + const VAddr dest = cmd_buff[1]; + const PortSet port_select(cmd_buff[2]); + const u32 image_size = cmd_buff[3]; + const u32 trans_unit = cmd_buff[4] & 0xFFFF; + + if (port_select.IsSingle()) { + int port_id = *port_select.begin(); + PortConfig& port = ports[port_id]; + CancelReceiving(port_id); + port.completion_event->Clear(); + port.dest = dest; + port.dest_size = image_size; + + if (port.is_busy) { + StartReceiving(port_id); + } else { + port.is_pending_receiving = true; + } + + cmd_buff[1] = RESULT_SUCCESS.raw; + cmd_buff[2] = IPC::CopyHandleDesc(); + cmd_buff[3] = Kernel::g_handle_table.Create(port.completion_event).MoveFrom(); + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0x7, 1, 2); - Kernel::Event* completion_event = - (Port)port == Port::Cam2 ? completion_event_cam2.get() : completion_event_cam1.get(); + LOG_DEBUG(Service_CAM, "called, addr=0x%X, port_select=%u, image_size=%u, trans_unit=%u", dest, + port_select.m_val, image_size, trans_unit); +} - completion_event->Signal(); +void IsFinishedReceiving(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); - cmd_buff[0] = IPC::MakeHeader(0x7, 1, 2); - cmd_buff[1] = RESULT_SUCCESS.raw; - cmd_buff[2] = IPC::CopyHandleDesc(); - cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom(); + const PortSet port_select(cmd_buff[1]); + + if (port_select.IsSingle()) { + int port = *port_select.begin(); + cmd_buff[1] = RESULT_SUCCESS.raw; + cmd_buff[2] = (ports[port].is_receiving || ports[port].is_pending_receiving) ? 0 : 1; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } - LOG_WARNING(Service_CAM, "(STUBBED) called, addr=0x%X, port=%d, image_size=%d, trans_unit=%d", - dest, port, image_size, trans_unit); + cmd_buff[0] = IPC::MakeHeader(0x8, 2, 0); + + LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val); } void SetTransferLines(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 port = cmd_buff[1] & 0xFF; - u16 transfer_lines = cmd_buff[2] & 0xFFFF; - u16 width = cmd_buff[3] & 0xFFFF; - u16 height = cmd_buff[4] & 0xFFFF; + const PortSet port_select(cmd_buff[1]); + const u32 transfer_lines = cmd_buff[2] & 0xFFFF; + const u32 width = cmd_buff[3] & 0xFFFF; + const u32 height = cmd_buff[4] & 0xFFFF; + + if (port_select.IsValid()) { + for (int i : port_select) { + ports[i].transfer_bytes = transfer_lines * width * 2; + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0); - cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d, lines=%d, width=%d, height=%d", port, - transfer_lines, width, height); + LOG_WARNING(Service_CAM, "(STUBBED) called, port_select=%u, lines=%u, width=%u, height=%u", + port_select.m_val, transfer_lines, width, height); } void GetMaxLines(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u16 width = cmd_buff[1] & 0xFFFF; - u16 height = cmd_buff[2] & 0xFFFF; + const u32 width = cmd_buff[1] & 0xFFFF; + const u32 height = cmd_buff[2] & 0xFFFF; + + // Note: the result of the algorithm below are hwtested with width < 640 and with height < 480 + constexpr u32 MIN_TRANSFER_UNIT = 256; + constexpr u32 MAX_BUFFER_SIZE = 2560; + if (width * height * 2 % MIN_TRANSFER_UNIT != 0) { + cmd_buff[1] = ERROR_OUT_OF_RANGE.raw; + } else { + u32 lines = MAX_BUFFER_SIZE / width; + if (lines > height) { + lines = height; + } + cmd_buff[1] = RESULT_SUCCESS.raw; + while (height % lines != 0 || (lines * width * 2 % MIN_TRANSFER_UNIT != 0)) { + --lines; + if (lines == 0) { + cmd_buff[1] = ERROR_OUT_OF_RANGE.raw; + break; + } + } + cmd_buff[2] = lines; + } cmd_buff[0] = IPC::MakeHeader(0xA, 2, 0); - cmd_buff[1] = RESULT_SUCCESS.raw; - cmd_buff[2] = TRANSFER_BYTES / (2 * width); - LOG_WARNING(Service_CAM, "(STUBBED) called, width=%d, height=%d, lines = %d", width, height, - cmd_buff[2]); + LOG_DEBUG(Service_CAM, "called, width=%u, height=%u", width, height); +} + +void SetTransferBytes(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const PortSet port_select(cmd_buff[1]); + const u32 transfer_bytes = cmd_buff[2] & 0xFFFF; + const u32 width = cmd_buff[3] & 0xFFFF; + const u32 height = cmd_buff[4] & 0xFFFF; + + if (port_select.IsValid()) { + for (int i : port_select) { + ports[i].transfer_bytes = transfer_bytes; + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0xB, 1, 0); + + LOG_WARNING(Service_CAM, "(STUBBED)called, port_select=%u, bytes=%u, width=%u, height=%u", + port_select.m_val, transfer_bytes, width, height); } void GetTransferBytes(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 port = cmd_buff[1] & 0xFF; + const PortSet port_select(cmd_buff[1]); + + if (port_select.IsSingle()) { + int port = *port_select.begin(); + cmd_buff[1] = RESULT_SUCCESS.raw; + cmd_buff[2] = ports[port].transfer_bytes; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0); - cmd_buff[1] = RESULT_SUCCESS.raw; - cmd_buff[2] = TRANSFER_BYTES; - LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); + LOG_WARNING(Service_CAM, "(STUBBED)called, port_select=%u", port_select.m_val); +} + +void GetMaxBytes(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const u32 width = cmd_buff[1] & 0xFFFF; + const u32 height = cmd_buff[2] & 0xFFFF; + + // Note: the result of the algorithm below are hwtested with width < 640 and with height < 480 + constexpr u32 MIN_TRANSFER_UNIT = 256; + constexpr u32 MAX_BUFFER_SIZE = 2560; + if (width * height * 2 % MIN_TRANSFER_UNIT != 0) { + cmd_buff[1] = ERROR_OUT_OF_RANGE.raw; + } else { + u32 bytes = MAX_BUFFER_SIZE; + + while (width * height * 2 % bytes != 0) { + bytes -= MIN_TRANSFER_UNIT; + } + + cmd_buff[1] = RESULT_SUCCESS.raw; + cmd_buff[2] = bytes; + } + cmd_buff[0] = IPC::MakeHeader(0xD, 2, 0); + + LOG_DEBUG(Service_CAM, "called, width=%u, height=%u", width, height); } void SetTrimming(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 port = cmd_buff[1] & 0xFF; - bool trim = (cmd_buff[2] & 0xFF) != 0; + const PortSet port_select(cmd_buff[1]); + const bool trim = (cmd_buff[2] & 0xFF) != 0; + + if (port_select.IsValid()) { + for (int i : port_select) { + ports[i].is_trimming = trim; + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0xE, 1, 0); - cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d, trim=%d", port, trim); + LOG_DEBUG(Service_CAM, "called, port_select=%u, trim=%d", port_select.m_val, trim); +} + +void IsTrimming(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const PortSet port_select(cmd_buff[1]); + + if (port_select.IsSingle()) { + int port = *port_select.begin(); + cmd_buff[1] = RESULT_SUCCESS.raw; + cmd_buff[2] = ports[port].is_trimming; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0); + + LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val); +} + +void SetTrimmingParams(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const PortSet port_select(cmd_buff[1]); + const u16 x0 = static_cast<u16>(cmd_buff[2] & 0xFFFF); + const u16 y0 = static_cast<u16>(cmd_buff[3] & 0xFFFF); + const u16 x1 = static_cast<u16>(cmd_buff[4] & 0xFFFF); + const u16 y1 = static_cast<u16>(cmd_buff[5] & 0xFFFF); + + if (port_select.IsValid()) { + for (int i : port_select) { + ports[i].x0 = x0; + ports[i].y0 = y0; + ports[i].x1 = x1; + ports[i].y1 = y1; + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0); + + LOG_DEBUG(Service_CAM, "called, port_select=%u, x0=%u, y0=%u, x1=%u, y1=%u", port_select.m_val, + x0, y0, x1, y1); +} + +void GetTrimmingParams(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const PortSet port_select(cmd_buff[1]); + + if (port_select.IsSingle()) { + int port = *port_select.begin(); + cmd_buff[1] = RESULT_SUCCESS.raw; + cmd_buff[2] = ports[port].x0; + cmd_buff[3] = ports[port].y0; + cmd_buff[4] = ports[port].x1; + cmd_buff[5] = ports[port].y1; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0x11, 5, 0); + + LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val); } void SetTrimmingParamsCenter(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 port = cmd_buff[1] & 0xFF; - s16 trimW = cmd_buff[2] & 0xFFFF; - s16 trimH = cmd_buff[3] & 0xFFFF; - s16 camW = cmd_buff[4] & 0xFFFF; - s16 camH = cmd_buff[5] & 0xFFFF; + const PortSet port_select(cmd_buff[1]); + const u16 trim_w = static_cast<u16>(cmd_buff[2] & 0xFFFF); + const u16 trim_h = static_cast<u16>(cmd_buff[3] & 0xFFFF); + const u16 cam_w = static_cast<u16>(cmd_buff[4] & 0xFFFF); + const u16 cam_h = static_cast<u16>(cmd_buff[5] & 0xFFFF); + + if (port_select.IsValid()) { + for (int i : port_select) { + ports[i].x0 = (cam_w - trim_w) / 2; + ports[i].y0 = (cam_h - trim_h) / 2; + ports[i].x1 = ports[i].x0 + trim_w; + ports[i].y1 = ports[i].y0 + trim_h; + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0); - cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d, trimW=%d, trimH=%d, camW=%d, camH=%d", - port, trimW, trimH, camW, camH); + LOG_DEBUG(Service_CAM, "called, port_select=%u, trim_w=%u, trim_h=%u, cam_w=%u, cam_h=%u", + port_select.m_val, trim_w, trim_h, cam_w, cam_h); } void Activate(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 cam_select = cmd_buff[1] & 0xFF; + const CameraSet camera_select(cmd_buff[1]); + + if (camera_select.IsValid()) { + if (camera_select.m_val == 0) { // deactive all + for (int i = 0; i < 2; ++i) { + if (ports[i].is_busy) { + CancelReceiving(i); + cameras[ports[i].camera_id].impl->StopCapture(); + ports[i].is_busy = false; + } + ports[i].is_active = false; + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else if (camera_select[0] && camera_select[1]) { + LOG_ERROR(Service_CAM, "camera 0 and 1 can't be both activated"); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } else { + if (camera_select[0]) { + ActivatePort(0, 0); + } else if (camera_select[1]) { + ActivatePort(0, 1); + } + + if (camera_select[2]) { + ActivatePort(1, 2); + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } + } else { + LOG_ERROR(Service_CAM, "invalid camera_select=%u", camera_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0); - cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called, cam_select=%d", cam_select); + LOG_DEBUG(Service_CAM, "called, camera_select=%u", camera_select.m_val); +} + +void SwitchContext(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const CameraSet camera_select(cmd_buff[1]); + const ContextSet context_select(cmd_buff[2]); + + if (camera_select.IsValid() && context_select.IsSingle()) { + int context = *context_select.begin(); + for (int camera : camera_select) { + cameras[camera].current_context = context; + const ContextConfig& context_config = cameras[camera].contexts[context]; + cameras[camera].impl->SetFlip(context_config.flip); + cameras[camera].impl->SetEffect(context_config.effect); + cameras[camera].impl->SetFormat(context_config.format); + cameras[camera].impl->SetResolution(context_config.resolution); + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val, + context_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0x14, 1, 0); + + LOG_DEBUG(Service_CAM, "called, camera_select=%u, context_select=%u", camera_select.m_val, + context_select.m_val); } void FlipImage(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 cam_select = cmd_buff[1] & 0xFF; - u8 flip = cmd_buff[2] & 0xFF; - u8 context = cmd_buff[3] & 0xFF; + const CameraSet camera_select(cmd_buff[1]); + const Flip flip = static_cast<Flip>(cmd_buff[2] & 0xFF); + const ContextSet context_select(cmd_buff[3]); + + if (camera_select.IsValid() && context_select.IsValid()) { + for (int camera : camera_select) { + for (int context : context_select) { + cameras[camera].contexts[context].flip = flip; + if (cameras[camera].current_context == context) { + cameras[camera].impl->SetFlip(flip); + } + } + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val, + context_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0x1D, 1, 0); - cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called, cam_select=%d, flip=%d, context=%d", cam_select, - flip, context); + LOG_DEBUG(Service_CAM, "called, camera_select=%u, flip=%d, context_select=%u", + camera_select.m_val, static_cast<int>(flip), context_select.m_val); +} + +void SetDetailSize(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const CameraSet camera_select(cmd_buff[1]); + Resolution resolution; + resolution.width = static_cast<u16>(cmd_buff[2] & 0xFFFF); + resolution.height = static_cast<u16>(cmd_buff[3] & 0xFFFF); + resolution.crop_x0 = static_cast<u16>(cmd_buff[4] & 0xFFFF); + resolution.crop_y0 = static_cast<u16>(cmd_buff[5] & 0xFFFF); + resolution.crop_x1 = static_cast<u16>(cmd_buff[6] & 0xFFFF); + resolution.crop_y1 = static_cast<u16>(cmd_buff[7] & 0xFFFF); + const ContextSet context_select(cmd_buff[8]); + + if (camera_select.IsValid() && context_select.IsValid()) { + for (int camera : camera_select) { + for (int context : context_select) { + cameras[camera].contexts[context].resolution = resolution; + if (cameras[camera].current_context == context) { + cameras[camera].impl->SetResolution(resolution); + } + } + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val, + context_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0x1E, 1, 0); + + LOG_DEBUG(Service_CAM, "called, camera_select=%u, width=%u, height=%u, crop_x0=%u, crop_y0=%u, " + "crop_x1=%u, crop_y1=%u, context_select=%u", + camera_select.m_val, resolution.width, resolution.height, resolution.crop_x0, + resolution.crop_y0, resolution.crop_x1, resolution.crop_y1, context_select.m_val); } void SetSize(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 cam_select = cmd_buff[1] & 0xFF; - u8 size = cmd_buff[2] & 0xFF; - u8 context = cmd_buff[3] & 0xFF; + const CameraSet camera_select(cmd_buff[1]); + const u32 size = cmd_buff[2] & 0xFF; + const ContextSet context_select(cmd_buff[3]); + + if (camera_select.IsValid() && context_select.IsValid()) { + for (int camera : camera_select) { + for (int context : context_select) { + cameras[camera].contexts[context].resolution = PRESET_RESOLUTION[size]; + if (cameras[camera].current_context == context) { + cameras[camera].impl->SetResolution(PRESET_RESOLUTION[size]); + } + } + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val, + context_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0x1F, 1, 0); - cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called, cam_select=%d, size=%d, context=%d", cam_select, - size, context); + LOG_DEBUG(Service_CAM, "called, camera_select=%u, size=%u, context_select=%u", + camera_select.m_val, size, context_select.m_val); } void SetFrameRate(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u8 cam_select = cmd_buff[1] & 0xFF; - u8 frame_rate = cmd_buff[2] & 0xFF; + const CameraSet camera_select(cmd_buff[1]); + const FrameRate frame_rate = static_cast<FrameRate>(cmd_buff[2] & 0xFF); + + if (camera_select.IsValid()) { + for (int camera : camera_select) { + cameras[camera].frame_rate = frame_rate; + // TODO(wwylele): consider hinting the actual camera with the expected frame rate + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid camera_select=%u", camera_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } cmd_buff[0] = IPC::MakeHeader(0x20, 1, 0); + + LOG_WARNING(Service_CAM, "(STUBBED) called, camera_select=%u, frame_rate=%d", + camera_select.m_val, static_cast<int>(frame_rate)); +} + +void SetEffect(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const CameraSet camera_select(cmd_buff[1]); + const Effect effect = static_cast<Effect>(cmd_buff[2] & 0xFF); + const ContextSet context_select(cmd_buff[3]); + + if (camera_select.IsValid() && context_select.IsValid()) { + for (int camera : camera_select) { + for (int context : context_select) { + cameras[camera].contexts[context].effect = effect; + if (cameras[camera].current_context == context) { + cameras[camera].impl->SetEffect(effect); + } + } + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val, + context_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0x22, 1, 0); + + LOG_DEBUG(Service_CAM, "called, camera_select=%u, effect=%d, context_select=%u", + camera_select.m_val, static_cast<int>(effect), context_select.m_val); +} + +void SetOutputFormat(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const CameraSet camera_select(cmd_buff[1]); + const OutputFormat format = static_cast<OutputFormat>(cmd_buff[2] & 0xFF); + const ContextSet context_select(cmd_buff[3]); + + if (camera_select.IsValid() && context_select.IsValid()) { + for (int camera : camera_select) { + for (int context : context_select) { + cameras[camera].contexts[context].format = format; + if (cameras[camera].current_context == context) { + cameras[camera].impl->SetFormat(format); + } + } + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val, + context_select.m_val); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(0x25, 1, 0); + + LOG_DEBUG(Service_CAM, "called, camera_select=%u, format=%d, context_select=%u", + camera_select.m_val, static_cast<int>(format), context_select.m_val); +} + +void SynchronizeVsyncTiming(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + const u32 camera_select1 = cmd_buff[1] & 0xFF; + const u32 camera_select2 = cmd_buff[2] & 0xFF; + + cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0); cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called, cam_select=%d, frame_rate=%d", cam_select, - frame_rate); + LOG_WARNING(Service_CAM, "(STUBBED) called, camera_select1=%u, camera_select2=%u", + camera_select1, camera_select2); } void GetStereoCameraCalibrationData(Service::Interface* self) { @@ -239,6 +965,67 @@ void GetStereoCameraCalibrationData(Service::Interface* self) { LOG_TRACE(Service_CAM, "called"); } +void SetPackageParameterWithoutContext(Service::Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + PackageParameterWithoutContext package; + std::memcpy(&package, cmd_buff + 1, sizeof(package)); + + cmd_buff[0] = IPC::MakeHeader(0x33, 1, 0); + cmd_buff[1] = RESULT_SUCCESS.raw; + + LOG_WARNING(Service_CAM, "(STUBBED) called"); +} + +template <typename PackageParameterType, int command_id> +static void SetPackageParameter() { + u32* cmd_buff = Kernel::GetCommandBuffer(); + + PackageParameterType package; + std::memcpy(&package, cmd_buff + 1, sizeof(package)); + + const CameraSet camera_select(static_cast<u32>(package.camera_select)); + const ContextSet context_select(static_cast<u32>(package.context_select)); + + if (camera_select.IsValid() && context_select.IsValid()) { + for (int camera_id : camera_select) { + CameraConfig& camera = cameras[camera_id]; + for (int context_id : context_select) { + ContextConfig& context = camera.contexts[context_id]; + context.effect = package.effect; + context.flip = package.flip; + context.resolution = package.GetResolution(); + if (context_id == camera.current_context) { + camera.impl->SetEffect(context.effect); + camera.impl->SetFlip(context.flip); + camera.impl->SetResolution(context.resolution); + } + } + } + cmd_buff[1] = RESULT_SUCCESS.raw; + } else { + LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", package.camera_select, + package.context_select); + cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw; + } + + cmd_buff[0] = IPC::MakeHeader(command_id, 1, 0); + + LOG_DEBUG(Service_CAM, "called"); +} + +Resolution PackageParameterWithContext::GetResolution() { + return PRESET_RESOLUTION[static_cast<int>(size)]; +} + +void SetPackageParameterWithContext(Service::Interface* self) { + SetPackageParameter<PackageParameterWithContext, 0x34>(); +} + +void SetPackageParameterWithContextDetail(Service::Interface* self) { + SetPackageParameter<PackageParameterWithContextDetail, 0x35>(); +} + void GetSuitableY2rStandardCoefficient(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); @@ -263,24 +1050,50 @@ void PlayShutterSound(Service::Interface* self) { void DriverInitialize(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - completion_event_cam1->Clear(); - completion_event_cam2->Clear(); - interrupt_error_event->Clear(); - vsync_interrupt_error_event->Clear(); + for (int camera_id = 0; camera_id < NumCameras; ++camera_id) { + CameraConfig& camera = cameras[camera_id]; + camera.current_context = 0; + for (int context_id = 0; context_id < 2; ++context_id) { + // Note: the following default values are verified against real 3DS + ContextConfig& context = camera.contexts[context_id]; + context.flip = camera_id == 1 ? Flip::Horizontal : Flip::None; + context.effect = Effect::None; + context.format = OutputFormat::YUV422; + context.resolution = + context_id == 0 ? PRESET_RESOLUTION[5 /*DS_LCD*/] : PRESET_RESOLUTION[0 /*VGA*/]; + } + camera.impl = Camera::CreateCamera(Settings::values.camera_name[camera_id], + Settings::values.camera_config[camera_id]); + camera.impl->SetFlip(camera.contexts[0].flip); + camera.impl->SetEffect(camera.contexts[0].effect); + camera.impl->SetFormat(camera.contexts[0].format); + camera.impl->SetResolution(camera.contexts[0].resolution); + } + + for (PortConfig& port : ports) { + port.Clear(); + } cmd_buff[0] = IPC::MakeHeader(0x39, 1, 0); cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called"); + LOG_DEBUG(Service_CAM, "called"); } void DriverFinalize(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); + CancelReceiving(0); + CancelReceiving(1); + + for (CameraConfig& camera : cameras) { + camera.impl = nullptr; + } + cmd_buff[0] = IPC::MakeHeader(0x3A, 1, 0); cmd_buff[1] = RESULT_SUCCESS.raw; - LOG_WARNING(Service_CAM, "(STUBBED) called"); + LOG_DEBUG(Service_CAM, "called"); } void Init() { @@ -291,21 +1104,28 @@ void Init() { AddService(new CAM_S_Interface); AddService(new CAM_U_Interface); - completion_event_cam1 = - Kernel::Event::Create(ResetType::OneShot, "CAM_U::completion_event_cam1"); - completion_event_cam2 = - Kernel::Event::Create(ResetType::OneShot, "CAM_U::completion_event_cam2"); - interrupt_error_event = - Kernel::Event::Create(ResetType::OneShot, "CAM_U::interrupt_error_event"); - vsync_interrupt_error_event = - Kernel::Event::Create(ResetType::OneShot, "CAM_U::vsync_interrupt_error_event"); + for (PortConfig& port : ports) { + port.completion_event = Event::Create(ResetType::Sticky, "CAM_U::completion_event"); + port.buffer_error_interrupt_event = + Event::Create(ResetType::OneShot, "CAM_U::buffer_error_interrupt_event"); + port.vsync_interrupt_event = + Event::Create(ResetType::OneShot, "CAM_U::vsync_interrupt_event"); + } + completion_event_callback = + CoreTiming::RegisterEvent("CAM_U::CompletionEventCallBack", CompletionEventCallBack); } void Shutdown() { - completion_event_cam1 = nullptr; - completion_event_cam2 = nullptr; - interrupt_error_event = nullptr; - vsync_interrupt_error_event = nullptr; + CancelReceiving(0); + CancelReceiving(1); + for (PortConfig& port : ports) { + port.completion_event = nullptr; + port.buffer_error_interrupt_event = nullptr; + port.vsync_interrupt_event = nullptr; + } + for (CameraConfig& camera : cameras) { + camera.impl = nullptr; + } } } // namespace CAM diff --git a/src/core/hle/service/cam/cam.h b/src/core/hle/service/cam/cam.h index c9b6f8acf..f6bff8bc6 100644 --- a/src/core/hle/service/cam/cam.h +++ b/src/core/hle/service/cam/cam.h @@ -13,17 +13,12 @@ namespace Service { namespace CAM { -enum class Port : u8 { None = 0, Cam1 = 1, Cam2 = 2, Both = Cam1 | Cam2 }; +enum CameraIndex { + OuterRightCamera = 0, + InnerCamera = 1, + OuterLeftCamera = 2, -enum class CameraSelect : u8 { - None = 0, - Out1 = 1, - In1 = 2, - Out2 = 4, - In1Out1 = Out1 | In1, - Out1Out2 = Out1 | Out2, - In1Out2 = In1 | Out2, - All = Out1 | In1 | Out2, + NumCameras = 3, }; enum class Effect : u8 { @@ -35,13 +30,6 @@ enum class Effect : u8 { Sepia01 = 5, }; -enum class Context : u8 { - None = 0, - A = 1, - B = 2, - Both = A | B, -}; - enum class Flip : u8 { None = 0, Horizontal = 1, @@ -160,8 +148,23 @@ struct StereoCameraCalibrationData { static_assert(sizeof(StereoCameraCalibrationData) == 64, "StereoCameraCalibrationData structure size is wrong"); -struct PackageParameterCameraSelect { - CameraSelect camera; +/** + * Resolution parameters for the camera. + * The native resolution of 3DS camera is 640 * 480. The captured image will be cropped in the + * region [crop_x0, crop_x1] * [crop_y0, crop_y1], and then scaled to size width * height as the + * output image. Note that all cropping coordinates are inclusive. + */ +struct Resolution { + u16 width; + u16 height; + u16 crop_x0; + u16 crop_y0; + u16 crop_x1; + u16 crop_y1; +}; + +struct PackageParameterWithoutContext { + u8 camera_select; s8 exposure; WhiteBalance white_balance; s8 sharpness; @@ -183,14 +186,43 @@ struct PackageParameterCameraSelect { s16 auto_white_balance_window_height; }; -static_assert(sizeof(PackageParameterCameraSelect) == 28, - "PackageParameterCameraSelect structure size is wrong"); +static_assert(sizeof(PackageParameterWithoutContext) == 28, + "PackageParameterCameraWithoutContext structure size is wrong"); + +struct PackageParameterWithContext { + u8 camera_select; + u8 context_select; + Flip flip; + Effect effect; + Size size; + INSERT_PADDING_BYTES(3); + + Resolution GetResolution(); +}; + +static_assert(sizeof(PackageParameterWithContext) == 8, + "PackageParameterWithContext structure size is wrong"); + +struct PackageParameterWithContextDetail { + u8 camera_select; + u8 context_select; + Flip flip; + Effect effect; + Resolution resolution; + + Resolution GetResolution() { + return resolution; + } +}; + +static_assert(sizeof(PackageParameterWithContextDetail) == 16, + "PackageParameterWithContextDetail structure size is wrong"); /** - * Unknown + * Starts capturing at the selected port. * Inputs: * 0: 0x00010040 - * 1: u8 Camera port (`Port` enum) + * 1: u8 selected port * Outputs: * 0: 0x00010040 * 1: ResultCode @@ -198,10 +230,10 @@ static_assert(sizeof(PackageParameterCameraSelect) == 28, void StartCapture(Service::Interface* self); /** - * Unknown + * Stops capturing from the selected port. * Inputs: * 0: 0x00020040 - * 1: u8 Camera port (`Port` enum) + * 1: u8 selected port * Outputs: * 0: 0x00020040 * 1: ResultCode @@ -209,10 +241,33 @@ void StartCapture(Service::Interface* self); void StopCapture(Service::Interface* self); /** + * Gets whether the selected port is currently capturing. + * Inputs: + * 0: 0x00030040 + * 1: u8 selected port + * Outputs: + * 0: 0x00030080 + * 1: ResultCode + * 2: 0 if not capturing, 1 if capturing + */ +void IsBusy(Service::Interface* self); + +/** + * Clears the buffer of selected ports. + * Inputs: + * 0: 0x00040040 + * 1: u8 selected port + * Outputs: + * 0: 0x00040040 + * 2: ResultCode + */ +void ClearBuffer(Service::Interface* self); + +/** * Unknown * Inputs: * 0: 0x00050040 - * 1: u8 Camera port (`Port` enum) + * 1: u8 selected port * Outputs: * 0: 0x00050042 * 1: ResultCode @@ -225,7 +280,7 @@ void GetVsyncInterruptEvent(Service::Interface* self); * Unknown * Inputs: * 0: 0x00060040 - * 1: u8 Camera port (`Port` enum) + * 1: u8 selected port * Outputs: * 0: 0x00060042 * 1: ResultCode @@ -241,9 +296,9 @@ void GetBufferErrorInterruptEvent(Service::Interface* self); * Inputs: * 0: 0x00070102 * 1: Destination address in calling process - * 2: u8 Camera port (`Port` enum) - * 3: Image size (in bytes?) - * 4: u16 Transfer unit size (in bytes?) + * 2: u8 selected port + * 3: Image size (in bytes) + * 4: u16 Transfer unit size (in bytes) * 5: Descriptor: Handle * 6: Handle to destination process * Outputs: @@ -255,21 +310,34 @@ void GetBufferErrorInterruptEvent(Service::Interface* self); void SetReceiving(Service::Interface* self); /** - * Unknown + * Gets whether the selected port finished receiving a frame. + * Inputs: + * 0: 0x00080040 + * 1: u8 selected port + * Outputs: + * 0: 0x00080080 + * 1: ResultCode + * 2: 0 if not finished, 1 if finished + */ +void IsFinishedReceiving(Service::Interface* self); + +/** + * Sets the number of lines the buffer contains. * Inputs: * 0: 0x00090100 - * 1: u8 Camera port (`Port` enum) + * 1: u8 selected port * 2: u16 Number of lines to transfer * 3: u16 Width * 4: u16 Height * Outputs: * 0: 0x00090040 * 1: ResultCode + * @todo figure out how the "buffer" actually works. */ void SetTransferLines(Service::Interface* self); /** - * Unknown + * Gets the maximum number of lines that fit in the buffer * Inputs: * 0: 0x000A0080 * 1: u16 Width @@ -277,27 +345,58 @@ void SetTransferLines(Service::Interface* self); * Outputs: * 0: 0x000A0080 * 1: ResultCode - * 2: Maximum number of lines that fit in the buffer(?) + * 2: Maximum number of lines that fit in the buffer + * @todo figure out how the "buffer" actually works. */ void GetMaxLines(Service::Interface* self); /** - * Unknown + * Sets the number of bytes the buffer contains. + * Inputs: + * 0: 0x000B0100 + * 1: u8 selected port + * 2: u16 Number of bytes to transfer + * 3: u16 Width + * 4: u16 Height + * Outputs: + * 0: 0x000B0040 + * 1: ResultCode + * @todo figure out how the "buffer" actually works. + */ +void SetTransferBytes(Service::Interface* self); + +/** + * Gets the number of bytes to the buffer contains. * Inputs: * 0: 0x000C0040 - * 1: u8 Camera port (`Port` enum) + * 1: u8 selected port * Outputs: * 0: 0x000C0080 * 1: ResultCode - * 2: Total number of bytes for each frame with current settings(?) + * 2: The number of bytes the buffer contains + * @todo figure out how the "buffer" actually works. */ void GetTransferBytes(Service::Interface* self); /** - * Unknown + * Gets the maximum number of bytes that fit in the buffer. + * Inputs: + * 0: 0x000D0080 + * 1: u16 Width + * 2: u16 Height + * Outputs: + * 0: 0x000D0080 + * 1: ResultCode + * 2: Maximum number of bytes that fit in the buffer + * @todo figure out how the "buffer" actually works. + */ +void GetMaxBytes(Service::Interface* self); + +/** + * Enables or disables trimming. * Inputs: * 0: 0x000E0080 - * 1: u8 Camera port (`Port` enum) + * 1: u8 selected port * 2: u8 bool Enable trimming if true * Outputs: * 0: 0x000E0040 @@ -306,14 +405,58 @@ void GetTransferBytes(Service::Interface* self); void SetTrimming(Service::Interface* self); /** - * Unknown + * Gets whether trimming is enabled. + * Inputs: + * 0: 0x000F0040 + * 1: u8 selected port + * Outputs: + * 0: 0x000F0080 + * 1: ResultCode + * 2: u8 bool Enable trimming if true + */ +void IsTrimming(Service::Interface* self); + +/** + * Sets the position to trim. + * Inputs: + * 0: 0x00100140 + * 1: u8 selected port + * 2: x start + * 3: y start + * 4: x end (exclusive) + * 5: y end (exclusive) + * Outputs: + * 0: 0x00100040 + * 1: ResultCode + */ +void SetTrimmingParams(Service::Interface* self); + +/** + * Gets the position to trim. + * Inputs: + * 0: 0x00110040 + * 1: u8 selected port + * + * Outputs: + * 0: 0x00110140 + * 1: ResultCode + * 2: x start + * 3: y start + * 4: x end (exclusive) + * 5: y end (exclusive) + */ +void GetTrimmingParams(Service::Interface* self); + +/** + * Sets the position to trim by giving the width and height. The trimming window is always at the + * center. * Inputs: * 0: 0x00120140 - * 1: u8 Camera port (`Port` enum) - * 2: s16 Trim width(?) - * 3: s16 Trim height(?) - * 4: s16 Camera width(?) - * 5: s16 Camera height(?) + * 1: u8 selected port + * 2: s16 Trim width + * 3: s16 Trim height + * 4: s16 Camera width + * 5: s16 Camera height * Outputs: * 0: 0x00120040 * 1: ResultCode @@ -324,7 +467,7 @@ void SetTrimmingParamsCenter(Service::Interface* self); * Selects up to two physical cameras to enable. * Inputs: * 0: 0x00130040 - * 1: u8 Cameras to activate (`CameraSelect` enum) + * 1: u8 selected camera * Outputs: * 0: 0x00130040 * 1: ResultCode @@ -332,12 +475,24 @@ void SetTrimmingParamsCenter(Service::Interface* self); void Activate(Service::Interface* self); /** - * Unknown + * Switches the context of camera settings. + * Inputs: + * 0: 0x00140080 + * 1: u8 selected camera + * 2: u8 selected context + * Outputs: + * 0: 0x00140040 + * 1: ResultCode + */ +void SwitchContext(Service::Interface* self); + +/** + * Sets flipping of images * Inputs: * 0: 0x001D00C0 - * 1: u8 Camera select (`CameraSelect` enum) + * 1: u8 selected camera * 2: u8 Type of flipping to perform (`Flip` enum) - * 3: u8 Context (`Context` enum) + * 3: u8 selected context * Outputs: * 0: 0x001D0040 * 1: ResultCode @@ -345,12 +500,30 @@ void Activate(Service::Interface* self); void FlipImage(Service::Interface* self); /** - * Unknown + * Sets camera resolution from custom parameters. For more details see the Resolution struct. + * Inputs: + * 0: 0x001E0200 + * 1: u8 selected camera + * 2: width + * 3: height + * 4: crop x0 + * 5: crop y0 + * 6: crop x1 + * 7: crop y1 + * 8: u8 selected context + * Outputs: + * 0: 0x001E0040 + * 1: ResultCode + */ +void SetDetailSize(Service::Interface* self); + +/** + * Sets camera resolution from preset resolution parameters. . * Inputs: * 0: 0x001F00C0 - * 1: u8 Camera select (`CameraSelect` enum) + * 1: u8 selected camera * 2: u8 Camera frame resolution (`Size` enum) - * 3: u8 Context id (`Context` enum) + * 3: u8 selected context * Outputs: * 0: 0x001F0040 * 1: ResultCode @@ -358,10 +531,10 @@ void FlipImage(Service::Interface* self); void SetSize(Service::Interface* self); /** - * Unknown + * Sets camera framerate. * Inputs: * 0: 0x00200080 - * 1: u8 Camera select (`CameraSelect` enum) + * 1: u8 selected camera * 2: u8 Camera framerate (`FrameRate` enum) * Outputs: * 0: 0x00200040 @@ -370,6 +543,44 @@ void SetSize(Service::Interface* self); void SetFrameRate(Service::Interface* self); /** + * Sets effect on the output image + * Inputs: + * 0: 0x002200C0 + * 1: u8 selected camera + * 2: u8 image effect (`Effect` enum) + * 3: u8 selected context + * Outputs: + * 0: 0x00220040 + * 1: ResultCode + */ +void SetEffect(Service::Interface* self); + +/** + * Sets format of the output image + * Inputs: + * 0: 0x002500C0 + * 1: u8 selected camera + * 2: u8 image format (`OutputFormat` enum) + * 3: u8 selected context + * Outputs: + * 0: 0x00250040 + * 1: ResultCode + */ +void SetOutputFormat(Service::Interface* self); + +/** + * Synchronizes the V-Sync timing of two cameras. + * Inputs: + * 0: 0x00290080 + * 1: u8 selected camera 1 + * 2: u8 selected camera 2 + * Outputs: + * 0: 0x00280040 + * 1: ResultCode + */ +void SynchronizeVsyncTiming(Service::Interface* self); + +/** * Returns calibration data relating the outside cameras to eachother, for use in AR applications. * * Inputs: @@ -382,6 +593,45 @@ void SetFrameRate(Service::Interface* self); void GetStereoCameraCalibrationData(Service::Interface* self); /** + * Batch-configures context-free settings. + * + * Inputs: + * 0: 0x003302C0 + * 1-7: struct PachageParameterWithoutContext + * 8-11: unused + * Outputs: + * 0: 0x00330040 + * 1: ResultCode + */ +void SetPackageParameterWithoutContext(Service::Interface* self); + +/** + * Batch-configures context-related settings with preset resolution parameters. + * + * Inputs: + * 0: 0x00340140 + * 1-2: struct PackageParameterWithContext + * 3-5: unused + * Outputs: + * 0: 0x00340040 + * 1: ResultCode + */ +void SetPackageParameterWithContext(Service::Interface* self); + +/** + * Batch-configures context-related settings with custom resolution parameters + * + * Inputs: + * 0: 0x003501C0 + * 1-4: struct PackageParameterWithContextDetail + * 5-7: unused + * Outputs: + * 0: 0x00350040 + * 1: ResultCode + */ +void SetPackageParameterWithContextDetail(Service::Interface* self); + +/** * Unknown * Inputs: * 0: 0x00360000 diff --git a/src/core/hle/service/cam/cam_u.cpp b/src/core/hle/service/cam/cam_u.cpp index af2123e5b..251c1e6d4 100644 --- a/src/core/hle/service/cam/cam_u.cpp +++ b/src/core/hle/service/cam/cam_u.cpp @@ -11,24 +11,24 @@ namespace CAM { const Interface::FunctionInfo FunctionTable[] = { {0x00010040, StartCapture, "StartCapture"}, {0x00020040, StopCapture, "StopCapture"}, - {0x00030040, nullptr, "IsBusy"}, - {0x00040040, nullptr, "ClearBuffer"}, + {0x00030040, IsBusy, "IsBusy"}, + {0x00040040, ClearBuffer, "ClearBuffer"}, {0x00050040, GetVsyncInterruptEvent, "GetVsyncInterruptEvent"}, {0x00060040, GetBufferErrorInterruptEvent, "GetBufferErrorInterruptEvent"}, {0x00070102, SetReceiving, "SetReceiving"}, - {0x00080040, nullptr, "IsFinishedReceiving"}, + {0x00080040, IsFinishedReceiving, "IsFinishedReceiving"}, {0x00090100, SetTransferLines, "SetTransferLines"}, {0x000A0080, GetMaxLines, "GetMaxLines"}, - {0x000B0100, nullptr, "SetTransferBytes"}, + {0x000B0100, SetTransferBytes, "SetTransferBytes"}, {0x000C0040, GetTransferBytes, "GetTransferBytes"}, - {0x000D0080, nullptr, "GetMaxBytes"}, + {0x000D0080, GetMaxBytes, "GetMaxBytes"}, {0x000E0080, SetTrimming, "SetTrimming"}, - {0x000F0040, nullptr, "IsTrimming"}, - {0x00100140, nullptr, "SetTrimmingParams"}, - {0x00110040, nullptr, "GetTrimmingParams"}, + {0x000F0040, IsTrimming, "IsTrimming"}, + {0x00100140, SetTrimmingParams, "SetTrimmingParams"}, + {0x00110040, GetTrimmingParams, "GetTrimmingParams"}, {0x00120140, SetTrimmingParamsCenter, "SetTrimmingParamsCenter"}, {0x00130040, Activate, "Activate"}, - {0x00140080, nullptr, "SwitchContext"}, + {0x00140080, SwitchContext, "SwitchContext"}, {0x00150080, nullptr, "SetExposure"}, {0x00160080, nullptr, "SetWhiteBalance"}, {0x00170080, nullptr, "SetWhiteBalanceWithoutBaseUp"}, @@ -38,18 +38,18 @@ const Interface::FunctionInfo FunctionTable[] = { {0x001B0080, nullptr, "SetAutoWhiteBalance"}, {0x001C0040, nullptr, "IsAutoWhiteBalance"}, {0x001D00C0, FlipImage, "FlipImage"}, - {0x001E0200, nullptr, "SetDetailSize"}, + {0x001E0200, SetDetailSize, "SetDetailSize"}, {0x001F00C0, SetSize, "SetSize"}, {0x00200080, SetFrameRate, "SetFrameRate"}, {0x00210080, nullptr, "SetPhotoMode"}, - {0x002200C0, nullptr, "SetEffect"}, + {0x002200C0, SetEffect, "SetEffect"}, {0x00230080, nullptr, "SetContrast"}, {0x00240080, nullptr, "SetLensCorrection"}, - {0x002500C0, nullptr, "SetOutputFormat"}, + {0x002500C0, SetOutputFormat, "SetOutputFormat"}, {0x00260140, nullptr, "SetAutoExposureWindow"}, {0x00270140, nullptr, "SetAutoWhiteBalanceWindow"}, {0x00280080, nullptr, "SetNoiseFilter"}, - {0x00290080, nullptr, "SynchronizeVsyncTiming"}, + {0x00290080, SynchronizeVsyncTiming, "SynchronizeVsyncTiming"}, {0x002A0080, nullptr, "GetLatestVsyncTiming"}, {0x002B0000, GetStereoCameraCalibrationData, "GetStereoCameraCalibrationData"}, {0x002C0400, nullptr, "SetStereoCameraCalibrationData"}, @@ -59,9 +59,9 @@ const Interface::FunctionInfo FunctionTable[] = { {0x00300080, nullptr, "ReadMcuVariableI2cExclusive"}, {0x00310180, nullptr, "SetImageQualityCalibrationData"}, {0x00320000, nullptr, "GetImageQualityCalibrationData"}, - {0x003302C0, nullptr, "SetPackageParameterWithoutContext"}, - {0x00340140, nullptr, "SetPackageParameterWithContext"}, - {0x003501C0, nullptr, "SetPackageParameterWithContextDetail"}, + {0x003302C0, SetPackageParameterWithoutContext, "SetPackageParameterWithoutContext"}, + {0x00340140, SetPackageParameterWithContext, "SetPackageParameterWithContext"}, + {0x003501C0, SetPackageParameterWithContextDetail, "SetPackageParameterWithContextDetail"}, {0x00360000, GetSuitableY2rStandardCoefficient, "GetSuitableY2rStandardCoefficient"}, {0x00370202, nullptr, "PlayShutterSoundWithWave"}, {0x00380040, PlayShutterSound, "PlayShutterSound"}, diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp index 59dd6d1cd..6f13cde27 100644 --- a/src/core/hle/service/cfg/cfg.cpp +++ b/src/core/hle/service/cfg/cfg.cpp @@ -322,47 +322,11 @@ static ResultVal<void*> GetConfigInfoBlockPointer(u32 block_id, u32 size, u32 fl return MakeResult<void*>(pointer); } -/// Checks if the language is available in the chosen region, and returns a proper one -static u8 AdjustLanguageInfoBlock(u32 region, u8 language) { - static const std::array<std::vector<u8>, 7> region_languages{{ - // JPN - {LANGUAGE_JP}, - // USA - {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_ES, LANGUAGE_PT}, - // EUR - {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_DE, LANGUAGE_IT, LANGUAGE_ES, LANGUAGE_NL, LANGUAGE_PT, - LANGUAGE_RU}, - // AUS - {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_DE, LANGUAGE_IT, LANGUAGE_ES, LANGUAGE_NL, LANGUAGE_PT, - LANGUAGE_RU}, - // CHN - {LANGUAGE_ZH}, - // KOR - {LANGUAGE_KO}, - // TWN - {LANGUAGE_TW}, - }}; - const auto& available = region_languages[region]; - if (std::find(available.begin(), available.end(), language) == available.end()) { - return available[0]; - } - return language; -} - ResultCode GetConfigInfoBlock(u32 block_id, u32 size, u32 flag, void* output) { void* pointer; CASCADE_RESULT(pointer, GetConfigInfoBlockPointer(block_id, size, flag)); memcpy(output, pointer, size); - // override the language setting if the region setting is auto - if (block_id == LanguageBlockID && - Settings::values.region_value == Settings::REGION_VALUE_AUTO_SELECT) { - u8 language; - memcpy(&language, output, sizeof(u8)); - language = AdjustLanguageInfoBlock(preferred_region_code, language); - memcpy(output, &language, sizeof(u8)); - } - return RESULT_SUCCESS; } @@ -586,9 +550,47 @@ void Init() { void Shutdown() {} +/// Checks if the language is available in the chosen region, and returns a proper one +static SystemLanguage AdjustLanguageInfoBlock(u32 region, SystemLanguage language) { + static const std::array<std::vector<SystemLanguage>, 7> region_languages{{ + // JPN + {LANGUAGE_JP}, + // USA + {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_ES, LANGUAGE_PT}, + // EUR + {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_DE, LANGUAGE_IT, LANGUAGE_ES, LANGUAGE_NL, LANGUAGE_PT, + LANGUAGE_RU}, + // AUS + {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_DE, LANGUAGE_IT, LANGUAGE_ES, LANGUAGE_NL, LANGUAGE_PT, + LANGUAGE_RU}, + // CHN + {LANGUAGE_ZH}, + // KOR + {LANGUAGE_KO}, + // TWN + {LANGUAGE_TW}, + }}; + const auto& available = region_languages[region]; + if (std::find(available.begin(), available.end(), language) == available.end()) { + return available[0]; + } + return language; +} + void SetPreferredRegionCode(u32 region_code) { preferred_region_code = region_code; LOG_INFO(Service_CFG, "Preferred region code set to %u", preferred_region_code); + + if (Settings::values.region_value == Settings::REGION_VALUE_AUTO_SELECT) { + const SystemLanguage current_language = GetSystemLanguage(); + const SystemLanguage adjusted_language = + AdjustLanguageInfoBlock(region_code, current_language); + if (current_language != adjusted_language) { + LOG_WARNING(Service_CFG, "System language %d does not fit the region. Adjusted to %d", + static_cast<int>(current_language), static_cast<int>(adjusted_language)); + SetSystemLanguage(adjusted_language); + } + } } void SetUsername(const std::u16string& name) { diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index a8c1331ed..1457518d4 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -705,6 +705,33 @@ static void ReleaseRight(Interface* self) { LOG_WARNING(Service_GSP, "called"); } +/** + * GSP_GPU::StoreDataCache service function + * + * This Function is a no-op, We aren't emulating the CPU cache any time soon. + * + * Inputs: + * 0 : Header code [0x001F0082] + * 1 : Address + * 2 : Size + * 3 : Value 0, some descriptor for the KProcess Handle + * 4 : KProcess handle + * Outputs: + * 1 : Result of function, 0 on success, otherwise error code + */ +static void StoreDataCache(Interface* self) { + u32* cmd_buff = Kernel::GetCommandBuffer(); + u32 address = cmd_buff[1]; + u32 size = cmd_buff[2]; + u32 process = cmd_buff[4]; + + cmd_buff[0] = IPC::MakeHeader(0x1F, 0x1, 0); + cmd_buff[1] = RESULT_SUCCESS.raw; // No error + + LOG_DEBUG(Service_GSP, "(STUBBED) called address=0x%08X, size=0x%08X, process=0x%08X", address, + size, process); +} + const Interface::FunctionInfo FunctionTable[] = { {0x00010082, WriteHWRegs, "WriteHWRegs"}, {0x00020084, WriteHWRegsWithMask, "WriteHWRegsWithMask"}, @@ -736,7 +763,7 @@ const Interface::FunctionInfo FunctionTable[] = { {0x001C0040, nullptr, "SetLedForceOff"}, {0x001D0040, nullptr, "SetTestCommand"}, {0x001E0080, nullptr, "SetInternalPriorities"}, - {0x001F0082, nullptr, "StoreDataCache"}, + {0x001F0082, StoreDataCache, "StoreDataCache"}, }; GSP_GPU::GSP_GPU() { diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 9afaf79ec..3a32b70aa 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -15,7 +15,7 @@ Values values = {}; void Apply() { - GDBStub::SetServerPort(static_cast<u32>(values.gdbstub_port)); + GDBStub::SetServerPort(values.gdbstub_port); GDBStub::ToggleServer(values.use_gdbstub); VideoCore::g_hw_renderer_enabled = values.use_hw_renderer; diff --git a/src/core/settings.h b/src/core/settings.h index e22ce0f16..b6c75531f 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -7,6 +7,7 @@ #include <array> #include <string> #include "common/common_types.h" +#include "core/hle/service/cam/cam.h" namespace Settings { @@ -106,6 +107,10 @@ struct Values { bool enable_audio_stretching; std::string audio_device_id; + // Camera + std::array<std::string, Service::CAM::NumCameras> camera_name; + std::array<std::string, Service::CAM::NumCameras> camera_config; + // Debugging bool use_gdbstub; u16 gdbstub_port; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d55b84ce0..ad984cd94 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,36 +1,30 @@ set(SRCS - renderer_opengl/gl_rasterizer.cpp - renderer_opengl/gl_rasterizer_cache.cpp - renderer_opengl/gl_shader_gen.cpp - renderer_opengl/gl_shader_util.cpp - renderer_opengl/gl_state.cpp - renderer_opengl/renderer_opengl.cpp - debug_utils/debug_utils.cpp clipper.cpp command_processor.cpp + debug_utils/debug_utils.cpp pica.cpp primitive_assembly.cpp rasterizer.cpp renderer_base.cpp + renderer_opengl/gl_rasterizer.cpp + renderer_opengl/gl_rasterizer_cache.cpp + renderer_opengl/gl_shader_gen.cpp + renderer_opengl/gl_shader_util.cpp + renderer_opengl/gl_state.cpp + renderer_opengl/renderer_opengl.cpp shader/shader.cpp shader/shader_interpreter.cpp swrasterizer.cpp + texture/etc1.cpp + texture/texture_decode.cpp vertex_loader.cpp video_core.cpp ) set(HEADERS - debug_utils/debug_utils.h - renderer_opengl/gl_rasterizer.h - renderer_opengl/gl_rasterizer_cache.h - renderer_opengl/gl_resource_manager.h - renderer_opengl/gl_shader_gen.h - renderer_opengl/gl_shader_util.h - renderer_opengl/gl_state.h - renderer_opengl/pica_to_gl.h - renderer_opengl/renderer_opengl.h clipper.h command_processor.h + debug_utils/debug_utils.h gpu_debugger.h pica.h pica_state.h @@ -39,10 +33,20 @@ set(HEADERS rasterizer.h rasterizer_interface.h renderer_base.h + renderer_opengl/gl_rasterizer.h + renderer_opengl/gl_rasterizer_cache.h + renderer_opengl/gl_resource_manager.h + renderer_opengl/gl_shader_gen.h + renderer_opengl/gl_shader_util.h + renderer_opengl/gl_state.h + renderer_opengl/pica_to_gl.h + renderer_opengl/renderer_opengl.h shader/debug_data.h shader/shader.h shader/shader_interpreter.h swrasterizer.h + texture/etc1.h + texture/texture_decode.h utils.h vertex_loader.h video_core.h diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index c44b3d95a..2d40f7d4f 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -35,6 +35,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" #include "video_core/shader/shader.h" +#include "video_core/texture/texture_decode.h" #include "video_core/utils.h" #include "video_core/video_core.h" @@ -315,257 +316,6 @@ std::unique_ptr<PicaTrace> FinishPicaTracing() { return ret; } -const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, - bool disable_alpha) { - const unsigned int coarse_x = x & ~7; - const unsigned int coarse_y = y & ~7; - - if (info.format != Regs::TextureFormat::ETC1 && info.format != Regs::TextureFormat::ETC1A4) { - // TODO(neobrain): Fix code design to unify vertical block offsets! - source += coarse_y * info.stride; - } - - // TODO: Assert that width/height are multiples of block dimensions - - switch (info.format) { - case Regs::TextureFormat::RGBA8: { - auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4)); - return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; - } - - case Regs::TextureFormat::RGB8: { - auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3)); - return {res.r(), res.g(), res.b(), 255}; - } - - case Regs::TextureFormat::RGB5A1: { - auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2)); - return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; - } - - case Regs::TextureFormat::RGB565: { - auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2)); - return {res.r(), res.g(), res.b(), 255}; - } - - case Regs::TextureFormat::RGBA4: { - auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2)); - return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; - } - - case Regs::TextureFormat::IA8: { - const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2); - - if (disable_alpha) { - // Show intensity as red, alpha as green - return {source_ptr[1], source_ptr[0], 0, 255}; - } else { - return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; - } - } - - case Regs::TextureFormat::RG8: { - auto res = Color::DecodeRG8(source + VideoCore::GetMortonOffset(x, y, 2)); - return {res.r(), res.g(), 0, 255}; - } - - case Regs::TextureFormat::I8: { - const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); - return {*source_ptr, *source_ptr, *source_ptr, 255}; - } - - case Regs::TextureFormat::A8: { - const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); - - if (disable_alpha) { - return {*source_ptr, *source_ptr, *source_ptr, 255}; - } else { - return {0, 0, 0, *source_ptr}; - } - } - - case Regs::TextureFormat::IA4: { - const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1); - - u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); - u8 a = Color::Convert4To8((*source_ptr) & 0xF); - - if (disable_alpha) { - // Show intensity as red, alpha as green - return {i, a, 0, 255}; - } else { - return {i, i, i, a}; - } - } - - case Regs::TextureFormat::I4: { - u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); - const u8* source_ptr = source + morton_offset / 2; - - u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); - i = Color::Convert4To8(i); - - return {i, i, i, 255}; - } - - case Regs::TextureFormat::A4: { - u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1); - const u8* source_ptr = source + morton_offset / 2; - - u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); - a = Color::Convert4To8(a); - - if (disable_alpha) { - return {a, a, a, 255}; - } else { - return {0, 0, 0, a}; - } - } - - case Regs::TextureFormat::ETC1: - case Regs::TextureFormat::ETC1A4: { - bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4); - - // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles - const int subtile_width = 4; - const int subtile_height = 4; - - int subtile_index = ((x / subtile_width) & 1) + 2 * ((y / subtile_height) & 1); - unsigned subtile_bytes = has_alpha ? 2 : 1; // TODO: Name... - - const u64* source_ptr = (const u64*)(source + coarse_x * subtile_bytes * 4 + - coarse_y * subtile_bytes * 4 * (info.width / 8) + - subtile_index * subtile_bytes * 8); - u64 alpha = 0xFFFFFFFFFFFFFFFF; - if (has_alpha) { - alpha = *source_ptr; - source_ptr++; - } - - union ETC1Tile { - // Each of these two is a collection of 16 bits (one per lookup value) - BitField<0, 16, u64> table_subindexes; - BitField<16, 16, u64> negation_flags; - - unsigned GetTableSubIndex(unsigned index) const { - return (table_subindexes >> index) & 1; - } - - bool GetNegationFlag(unsigned index) const { - return ((negation_flags >> index) & 1) == 1; - } - - BitField<32, 1, u64> flip; - BitField<33, 1, u64> differential_mode; - - BitField<34, 3, u64> table_index_2; - BitField<37, 3, u64> table_index_1; - - union { - // delta value + base value - BitField<40, 3, s64> db; - BitField<43, 5, u64> b; - - BitField<48, 3, s64> dg; - BitField<51, 5, u64> g; - - BitField<56, 3, s64> dr; - BitField<59, 5, u64> r; - } differential; - - union { - BitField<40, 4, u64> b2; - BitField<44, 4, u64> b1; - - BitField<48, 4, u64> g2; - BitField<52, 4, u64> g1; - - BitField<56, 4, u64> r2; - BitField<60, 4, u64> r1; - } separate; - - const Math::Vec3<u8> GetRGB(int x, int y) const { - int texel = 4 * x + y; - - if (flip) - std::swap(x, y); - - // Lookup base value - Math::Vec3<int> ret; - if (differential_mode) { - ret.r() = static_cast<int>(differential.r); - ret.g() = static_cast<int>(differential.g); - ret.b() = static_cast<int>(differential.b); - if (x >= 2) { - ret.r() += static_cast<int>(differential.dr); - ret.g() += static_cast<int>(differential.dg); - ret.b() += static_cast<int>(differential.db); - } - ret.r() = Color::Convert5To8(ret.r()); - ret.g() = Color::Convert5To8(ret.g()); - ret.b() = Color::Convert5To8(ret.b()); - } else { - if (x < 2) { - ret.r() = Color::Convert4To8(static_cast<u8>(separate.r1)); - ret.g() = Color::Convert4To8(static_cast<u8>(separate.g1)); - ret.b() = Color::Convert4To8(static_cast<u8>(separate.b1)); - } else { - ret.r() = Color::Convert4To8(static_cast<u8>(separate.r2)); - ret.g() = Color::Convert4To8(static_cast<u8>(separate.g2)); - ret.b() = Color::Convert4To8(static_cast<u8>(separate.b2)); - } - } - - // Add modifier - unsigned table_index = - static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); - - static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{ - {{2, 8}}, - {{5, 17}}, - {{9, 29}}, - {{13, 42}}, - {{18, 60}}, - {{24, 80}}, - {{33, 106}}, - {{47, 183}}, - }}; - - int modifier = etc1_modifier_table.at(table_index).at(GetTableSubIndex(texel)); - if (GetNegationFlag(texel)) - modifier *= -1; - - ret.r() = MathUtil::Clamp(ret.r() + modifier, 0, 255); - ret.g() = MathUtil::Clamp(ret.g() + modifier, 0, 255); - ret.b() = MathUtil::Clamp(ret.b() + modifier, 0, 255); - - return ret.Cast<u8>(); - } - } const* etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr); - - alpha >>= 4 * ((x & 3) * 4 + (y & 3)); - return Math::MakeVec(etc1_tile->GetRGB(x & 3, y & 3), - disable_alpha ? (u8)255 : Color::Convert4To8(alpha & 0xF)); - } - - default: - LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format); - DEBUG_ASSERT(false); - return {}; - } -} - -TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, - const Regs::TextureFormat& format) { - TextureInfo info; - info.physical_address = config.GetPhysicalAddress(); - info.width = config.width; - info.height = config.height; - info.format = format; - info.stride = Pica::Regs::NibblesPerPixel(info.format) * info.width / 2; - return info; -} - #ifdef HAVE_PNG // Adapter functions to libpng to write/flush to File::IOFile instances. static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) { @@ -642,12 +392,12 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { buf = new u8[row_stride * texture_config.height]; for (unsigned y = 0; y < texture_config.height; ++y) { for (unsigned x = 0; x < texture_config.width; ++x) { - TextureInfo info; + Pica::Texture::TextureInfo info; info.width = texture_config.width; info.height = texture_config.height; info.stride = row_stride; info.format = g_state.regs.texture0_format; - Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info); + Math::Vec4<u8> texture_color = Pica::Texture::LookupTexture(data, x, y, info); buf[3 * x + y * row_stride] = texture_color.r(); buf[3 * x + y * row_stride + 1] = texture_color.g(); buf[3 * x + y * row_stride + 2] = texture_color.b(); diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 46ea8d9c7..938a2e1b5 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -205,31 +205,6 @@ inline bool IsPicaTracing() { void OnPicaRegWrite(PicaTrace::Write write); std::unique_ptr<PicaTrace> FinishPicaTracing(); -struct TextureInfo { - PAddr physical_address; - int width; - int height; - int stride; - Pica::Regs::TextureFormat format; - - static TextureInfo FromPicaRegister(const Pica::Regs::TextureConfig& config, - const Pica::Regs::TextureFormat& format); -}; - -/** - * Lookup texel located at the given coordinates and return an RGBA vector of its color. - * @param source Source pointer to read data from - * @param s,t Texture coordinates to read from - * @param info TextureInfo object describing the texture setup - * @param disable_alpha This is used for debug widgets which use this method to display textures - * without providing a good way to visualize alpha by themselves. If true, this will return 255 for - * the alpha component, and either drop the information entirely or store it in an "unused" color - * channel. - * @todo Eventually we should get rid of the disable_alpha parameter. - */ -const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info, - bool disable_alpha = false); - void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage); diff --git a/src/video_core/pica.h b/src/video_core/pica.h index e326f7727..731540b99 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -276,8 +276,11 @@ struct Regs { case TextureFormat::I8: case TextureFormat::A8: case TextureFormat::IA4: - default: // placeholder for yet unknown formats return 2; + + default: // placeholder for yet unknown formats + UNIMPLEMENTED(); + return 0; } } diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 0674eb85e..287d732b5 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -21,6 +21,7 @@ #include "video_core/pica_types.h" #include "video_core/rasterizer.h" #include "video_core/shader/shader.h" +#include "video_core/texture/texture_decode.h" #include "video_core/utils.h" namespace Pica { @@ -579,10 +580,10 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); auto info = - DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); + Texture::TextureInfo::FromPicaRegister(texture.config, texture.format); // TODO: Apply the min and mag filters to the texture - texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); + texture_color[i] = Texture::LookupTexture(texture_data, s, t, info); #if PICA_DUMP_TEXTURES DebugUtils::DumpTexture(texture.config, texture_data); #endif diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9dd9ae0fb..071e4ace0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -716,8 +716,6 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { MICROPROFILE_SCOPE(OpenGL_Blits); - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; CachedSurface src_params; src_params.addr = config.GetPhysicalInputAddress(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index ef3b06a7b..60380257a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -17,10 +17,10 @@ #include "common/vector_math.h" #include "core/frontend/emu_window.h" #include "core/memory.h" -#include "video_core/debug_utils/debug_utils.h" #include "video_core/pica_state.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/texture/texture_decode.h" #include "video_core/utils.h" #include "video_core/video_core.h" @@ -172,7 +172,6 @@ bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) { - using SurfaceType = CachedSurface::SurfaceType; if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { @@ -340,17 +339,16 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height); - Pica::DebugUtils::TextureInfo tex_info; + Pica::Texture::TextureInfo tex_info; tex_info.width = params.width; tex_info.height = params.height; - tex_info.stride = - params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8; tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format; + tex_info.SetDefaultStride(); tex_info.physical_address = params.addr; for (unsigned y = 0; y < params.height; ++y) { for (unsigned x = 0; x < params.width; ++x) { - tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture( + tex_buffer[x + params.width * y] = Pica::Texture::LookupTexture( texture_src_data, x, params.height - 1 - y, tex_info); } } @@ -513,8 +511,9 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params CachedSurface* RasterizerCacheOpenGL::GetTextureSurface( const Pica::Regs::FullTextureConfig& config) { - Pica::DebugUtils::TextureInfo info = - Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); + + Pica::Texture::TextureInfo info = + Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); CachedSurface params; params.addr = info.physical_address; diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 49806e8c9..92b35dbc0 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -144,6 +144,8 @@ static const BitSet32 persistent_regs = BuildRegSet({ ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Constants ONE, NEGBIT, + // Loop variables + LOOPCOUNT, LOOPINC, }); /// Raw constant for the source register selector that indicates no swizzling is performed @@ -587,7 +589,7 @@ void JitShader::Compile_RSQ(Instruction instr) { void JitShader::Compile_NOP(Instruction instr) {} void JitShader::Compile_END(Instruction instr) { - ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8); + ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); ret(); } @@ -839,7 +841,10 @@ void JitShader::Compile(const std::array<u32, 1024>* program_code_, FindReturnOffsets(); // The stack pointer is 8 modulo 16 at the entry of a procedure - ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8); + // We reserve 16 bytes and assign a dummy value to the first 8 bytes, to catch any potential + // return checks (see Compile_Return) that happen in shader main routine. + ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); + mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL); mov(SETUP, ABI_PARAM1); mov(STATE, ABI_PARAM2); diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 29e9875ea..599e43ffd 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -12,7 +12,6 @@ #include <xbyak.h> #include "common/bit_set.h" #include "common/common_types.h" -#include "common/x64/emitter.h" #include "video_core/shader/shader.h" using nihstro::Instruction; diff --git a/src/video_core/texture/etc1.cpp b/src/video_core/texture/etc1.cpp new file mode 100644 index 000000000..af60cde1e --- /dev/null +++ b/src/video_core/texture/etc1.cpp @@ -0,0 +1,124 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include "common/bit_field.h" +#include "common/color.h" +#include "common/common_types.h" +#include "common/math_util.h" +#include "common/vector_math.h" +#include "video_core/texture/etc1.h" + +namespace Pica { +namespace Texture { + +namespace { + +constexpr std::array<u8[2], 8> etc1_modifier_table = {{ + {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}, +}}; + +union ETC1Tile { + u64 raw; + + // Each of these two is a collection of 16 bits (one per lookup value) + BitField<0, 16, u64> table_subindexes; + BitField<16, 16, u64> negation_flags; + + unsigned GetTableSubIndex(unsigned index) const { + return (table_subindexes >> index) & 1; + } + + bool GetNegationFlag(unsigned index) const { + return ((negation_flags >> index) & 1) == 1; + } + + BitField<32, 1, u64> flip; + BitField<33, 1, u64> differential_mode; + + BitField<34, 3, u64> table_index_2; + BitField<37, 3, u64> table_index_1; + + union { + // delta value + base value + BitField<40, 3, s64> db; + BitField<43, 5, u64> b; + + BitField<48, 3, s64> dg; + BitField<51, 5, u64> g; + + BitField<56, 3, s64> dr; + BitField<59, 5, u64> r; + } differential; + + union { + BitField<40, 4, u64> b2; + BitField<44, 4, u64> b1; + + BitField<48, 4, u64> g2; + BitField<52, 4, u64> g1; + + BitField<56, 4, u64> r2; + BitField<60, 4, u64> r1; + } separate; + + const Math::Vec3<u8> GetRGB(unsigned int x, unsigned int y) const { + int texel = 4 * x + y; + + if (flip) + std::swap(x, y); + + // Lookup base value + Math::Vec3<int> ret; + if (differential_mode) { + ret.r() = static_cast<int>(differential.r); + ret.g() = static_cast<int>(differential.g); + ret.b() = static_cast<int>(differential.b); + if (x >= 2) { + ret.r() += static_cast<int>(differential.dr); + ret.g() += static_cast<int>(differential.dg); + ret.b() += static_cast<int>(differential.db); + } + ret.r() = Color::Convert5To8(ret.r()); + ret.g() = Color::Convert5To8(ret.g()); + ret.b() = Color::Convert5To8(ret.b()); + } else { + if (x < 2) { + ret.r() = Color::Convert4To8(static_cast<u8>(separate.r1)); + ret.g() = Color::Convert4To8(static_cast<u8>(separate.g1)); + ret.b() = Color::Convert4To8(static_cast<u8>(separate.b1)); + } else { + ret.r() = Color::Convert4To8(static_cast<u8>(separate.r2)); + ret.g() = Color::Convert4To8(static_cast<u8>(separate.g2)); + ret.b() = Color::Convert4To8(static_cast<u8>(separate.b2)); + } + } + + // Add modifier + unsigned table_index = + static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); + + int modifier = etc1_modifier_table[table_index][GetTableSubIndex(texel)]; + if (GetNegationFlag(texel)) + modifier *= -1; + + ret.r() = MathUtil::Clamp(ret.r() + modifier, 0, 255); + ret.g() = MathUtil::Clamp(ret.g() + modifier, 0, 255); + ret.b() = MathUtil::Clamp(ret.b() + modifier, 0, 255); + + return ret.Cast<u8>(); + } +}; + +} // anonymous namespace + +Math::Vec3<u8> SampleETC1Subtile(u64 value, unsigned int x, unsigned int y) { + ETC1Tile tile{value}; + return tile.GetRGB(x, y); +} + +} // namespace Texture +} // namespace Pica diff --git a/src/video_core/texture/etc1.h b/src/video_core/texture/etc1.h new file mode 100644 index 000000000..e188b19df --- /dev/null +++ b/src/video_core/texture/etc1.h @@ -0,0 +1,16 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "common/vector_math.h" + +namespace Pica { +namespace Texture { + +Math::Vec3<u8> SampleETC1Subtile(u64 value, unsigned int x, unsigned int y); + +} // namespace Texture +} // namespace Pica diff --git a/src/video_core/texture/texture_decode.cpp b/src/video_core/texture/texture_decode.cpp new file mode 100644 index 000000000..f611a1aa9 --- /dev/null +++ b/src/video_core/texture/texture_decode.cpp @@ -0,0 +1,229 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/assert.h" +#include "common/color.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/swap.h" +#include "common/vector_math.h" +#include "video_core/pica.h" +#include "video_core/texture/etc1.h" +#include "video_core/texture/texture_decode.h" +#include "video_core/utils.h" + +using TextureFormat = Pica::Regs::TextureFormat; + +namespace Pica { +namespace Texture { + +constexpr size_t TILE_SIZE = 8 * 8; +constexpr size_t ETC1_SUBTILES = 2 * 2; + +size_t CalculateTileSize(TextureFormat format) { + switch (format) { + case TextureFormat::RGBA8: + return 4 * TILE_SIZE; + + case TextureFormat::RGB8: + return 3 * TILE_SIZE; + + case TextureFormat::RGB5A1: + case TextureFormat::RGB565: + case TextureFormat::RGBA4: + case TextureFormat::IA8: + case TextureFormat::RG8: + return 2 * TILE_SIZE; + + case TextureFormat::I8: + case TextureFormat::A8: + case TextureFormat::IA4: + return 1 * TILE_SIZE; + + case TextureFormat::I4: + case TextureFormat::A4: + return TILE_SIZE / 2; + + case TextureFormat::ETC1: + return ETC1_SUBTILES * 8; + + case TextureFormat::ETC1A4: + return ETC1_SUBTILES * 16; + + default: // placeholder for yet unknown formats + UNIMPLEMENTED(); + return 0; + } +} + +Math::Vec4<u8> LookupTexture(const u8* source, unsigned int x, unsigned int y, + const TextureInfo& info, bool disable_alpha) { + // Coordinate in tiles + const unsigned int coarse_x = x / 8; + const unsigned int coarse_y = y / 8; + + // Coordinate inside the tile + const unsigned int fine_x = x % 8; + const unsigned int fine_y = y % 8; + + const u8* line = source + coarse_y * info.stride; + const u8* tile = line + coarse_x * CalculateTileSize(info.format); + return LookupTexelInTile(tile, fine_x, fine_y, info, disable_alpha); +} + +Math::Vec4<u8> LookupTexelInTile(const u8* source, unsigned int x, unsigned int y, + const TextureInfo& info, bool disable_alpha) { + DEBUG_ASSERT(x < 8); + DEBUG_ASSERT(y < 8); + + using VideoCore::MortonInterleave; + + switch (info.format) { + case Regs::TextureFormat::RGBA8: { + auto res = Color::DecodeRGBA8(source + MortonInterleave(x, y) * 4); + return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; + } + + case Regs::TextureFormat::RGB8: { + auto res = Color::DecodeRGB8(source + MortonInterleave(x, y) * 3); + return {res.r(), res.g(), res.b(), 255}; + } + + case Regs::TextureFormat::RGB5A1: { + auto res = Color::DecodeRGB5A1(source + MortonInterleave(x, y) * 2); + return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; + } + + case Regs::TextureFormat::RGB565: { + auto res = Color::DecodeRGB565(source + MortonInterleave(x, y) * 2); + return {res.r(), res.g(), res.b(), 255}; + } + + case Regs::TextureFormat::RGBA4: { + auto res = Color::DecodeRGBA4(source + MortonInterleave(x, y) * 2); + return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())}; + } + + case Regs::TextureFormat::IA8: { + const u8* source_ptr = source + MortonInterleave(x, y) * 2; + + if (disable_alpha) { + // Show intensity as red, alpha as green + return {source_ptr[1], source_ptr[0], 0, 255}; + } else { + return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; + } + } + + case Regs::TextureFormat::RG8: { + auto res = Color::DecodeRG8(source + MortonInterleave(x, y) * 2); + return {res.r(), res.g(), 0, 255}; + } + + case Regs::TextureFormat::I8: { + const u8* source_ptr = source + MortonInterleave(x, y); + return {*source_ptr, *source_ptr, *source_ptr, 255}; + } + + case Regs::TextureFormat::A8: { + const u8* source_ptr = source + MortonInterleave(x, y); + + if (disable_alpha) { + return {*source_ptr, *source_ptr, *source_ptr, 255}; + } else { + return {0, 0, 0, *source_ptr}; + } + } + + case Regs::TextureFormat::IA4: { + const u8* source_ptr = source + MortonInterleave(x, y); + + u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); + u8 a = Color::Convert4To8((*source_ptr) & 0xF); + + if (disable_alpha) { + // Show intensity as red, alpha as green + return {i, a, 0, 255}; + } else { + return {i, i, i, a}; + } + } + + case Regs::TextureFormat::I4: { + u32 morton_offset = MortonInterleave(x, y); + const u8* source_ptr = source + morton_offset / 2; + + u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); + i = Color::Convert4To8(i); + + return {i, i, i, 255}; + } + + case Regs::TextureFormat::A4: { + u32 morton_offset = MortonInterleave(x, y); + const u8* source_ptr = source + morton_offset / 2; + + u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF); + a = Color::Convert4To8(a); + + if (disable_alpha) { + return {a, a, a, 255}; + } else { + return {0, 0, 0, a}; + } + } + + case Regs::TextureFormat::ETC1: + case Regs::TextureFormat::ETC1A4: { + bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4); + size_t subtile_size = has_alpha ? 16 : 8; + + // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles + constexpr unsigned int subtile_width = 4; + constexpr unsigned int subtile_height = 4; + + unsigned int subtile_index = (x / subtile_width) + 2 * (y / subtile_height); + x %= subtile_width; + y %= subtile_height; + + const u8* subtile_ptr = source + subtile_index * subtile_size; + + u8 alpha = 255; + if (has_alpha) { + u64_le packed_alpha; + memcpy(&packed_alpha, subtile_ptr, sizeof(u64)); + subtile_ptr += sizeof(u64); + + alpha = Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF); + } + + u64_le subtile_data; + memcpy(&subtile_data, subtile_ptr, sizeof(u64)); + + return Math::MakeVec(SampleETC1Subtile(subtile_data, x, y), + disable_alpha ? (u8)255 : alpha); + } + + default: + LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format); + DEBUG_ASSERT(false); + return {}; + } +} + +TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, + const Regs::TextureFormat& format) { + TextureInfo info; + info.physical_address = config.GetPhysicalAddress(); + info.width = config.width; + info.height = config.height; + info.format = format; + info.SetDefaultStride(); + return info; +} + +} // namespace Texture +} // namespace Pica diff --git a/src/video_core/texture/texture_decode.h b/src/video_core/texture/texture_decode.h new file mode 100644 index 000000000..5c636939a --- /dev/null +++ b/src/video_core/texture/texture_decode.h @@ -0,0 +1,60 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "common/vector_math.h" +#include "video_core/pica.h" + +namespace Pica { +namespace Texture { + +/// Returns the byte size of a 8*8 tile of the specified texture format. +size_t CalculateTileSize(Pica::Regs::TextureFormat format); + +struct TextureInfo { + PAddr physical_address; + unsigned int width; + unsigned int height; + ptrdiff_t stride; + Pica::Regs::TextureFormat format; + + static TextureInfo FromPicaRegister(const Pica::Regs::TextureConfig& config, + const Pica::Regs::TextureFormat& format); + + /// Calculates stride from format and width, assuming that the entire texture is contiguous. + void SetDefaultStride() { + stride = Pica::Texture::CalculateTileSize(format) * (width / 8); + } +}; + +/** + * Lookup texel located at the given coordinates and return an RGBA vector of its color. + * @param source Source pointer to read data from + * @param x,y Texture coordinates to read from + * @param info TextureInfo object describing the texture setup + * @param disable_alpha This is used for debug widgets which use this method to display textures + * without providing a good way to visualize alpha by themselves. If true, this will return 255 for + * the alpha component, and either drop the information entirely or store it in an "unused" color + * channel. + * @todo Eventually we should get rid of the disable_alpha parameter. + */ +Math::Vec4<u8> LookupTexture(const u8* source, unsigned int x, unsigned int y, + const TextureInfo& info, bool disable_alpha = false); + +/** + * Looks up a texel from a single 8x8 texture tile. + * + * @param source Pointer to the beginning of the tile. + * @param x, y In-tile coordinates to read from. Must be < 8. + * @param info TextureInfo describing the texture format. + * @param disable_alpha Used for debugging. Sets the result alpha to 255 and either discards the + * real alpha or inserts it in an otherwise unused channel. + */ +Math::Vec4<u8> LookupTexelInTile(const u8* source, unsigned int x, unsigned int y, + const TextureInfo& info, bool disable_alpha); + +} // namespace Texture +} // namespace Pica |