diff --git a/FEXCore/Source/Interface/Core/CPUBackend.cpp b/FEXCore/Source/Interface/Core/CPUBackend.cpp
index b9ecca4332..629b490ca8 100644
--- a/FEXCore/Source/Interface/Core/CPUBackend.cpp
+++ b/FEXCore/Source/Interface/Core/CPUBackend.cpp
@@ -39,6 +39,12 @@ namespace CPU {
     {0xC90F'DAA2'2168'C235ULL, 0x0000'0000'0000'4000ULL}, // NAMED_VECTOR_X87_PI
     {0x9A20'9A84'FBCF'F799ULL, 0x0000'0000'0000'3FFDULL}, // NAMED_VECTOR_X87_LOG10_2
     {0xB172'17F7'D1CF'79ACULL, 0x0000'0000'0000'3FFEULL}, // NAMED_VECTOR_X87_LOG_2
+    {0x4F00'0000'4F00'0000ULL, 0x4F00'0000'4F00'0000ULL}, // NAMED_VECTOR_CVTMAX_F32_I32
+    {0x5F00'0000'5F00'0000ULL, 0x5F00'0000'5F00'0000ULL}, // NAMED_VECTOR_CVTMAX_F32_I64
+    {0x41E0'0000'0000'0000ULL, 0x41E0'0000'0000'0000ULL}, // NAMED_VECTOR_CVTMAX_F64_I32
+    {0x43E0'0000'0000'0000ULL, 0x43E0'0000'0000'0000ULL}, // NAMED_VECTOR_CVTMAX_F64_I64
+    {0x8000'0000'8000'0000ULL, 0x8000'0000'8000'0000ULL}, // NAMED_VECTOR_CVTMAX_I32
+    {0x8000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_CVTMAX_I64
   };
 
   constexpr static auto PSHUFLW_LUT {[]() consteval {
diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
index 9b08ea64f4..1c477988a5 100644
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
@@ -5089,9 +5089,9 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
     {OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<OpSize::i64Bit, OpSize::i32Bit>},
     {OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<OpSize::i32Bit, OpSize::i64Bit>},
 
-    {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Int_To_Float<OpSize::i32Bit, false>},
-    {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<OpSize::i32Bit, false, true>},
-    {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<OpSize::i32Bit, false, false>},
+    {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, false>},
+    {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, true>},
+    {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false>},
 
     {OPD(1, 0b00, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFSUB, OpSize::i32Bit>},
     {OPD(1, 0b01, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFSUB, OpSize::i64Bit>},
@@ -5191,9 +5191,9 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
     {OPD(1, 0b01, 0xE4), 1, &OpDispatchBuilder::VPMULHWOp<false>},
     {OPD(1, 0b01, 0xE5), 1, &OpDispatchBuilder::VPMULHWOp<true>},
 
-    {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<OpSize::i64Bit, true, false>},
-    {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Int_To_Float<OpSize::i32Bit, true>},
-    {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<OpSize::i64Bit, true, true>},
+    {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, false>},
+    {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, true>},
+    {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true>},
 
     {OPD(1, 0b01, 0xE7), 1, &OpDispatchBuilder::MOVVectorNTOp},
 
diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
index 2fee162807..4c88a19f85 100644
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
@@ -469,10 +469,10 @@ class OpDispatchBuilder final : public IREmitter {
   template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
   void Scalar_CVT_Float_To_Float(OpcodeArgs);
   void Vector_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize, bool IsAVX);
-  template<IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode>
+  template<IR::OpSize SrcElementSize, bool HostRoundingMode>
   void Vector_CVT_Float_To_Int(OpcodeArgs);
   void MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs);
-  template<IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode>
+  template<IR::OpSize SrcElementSize, bool HostRoundingMode>
   void XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs);
   void MASKMOVOp(OpcodeArgs);
   void MOVBetweenGPR_FPR(OpcodeArgs, VectorOpType VectorType);
@@ -518,12 +518,6 @@ class OpDispatchBuilder final : public IREmitter {
   template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
   void AVXScalar_CVT_Float_To_Float(OpcodeArgs);
 
-  template<IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode>
-  void AVXVector_CVT_Float_To_Int(OpcodeArgs);
-
-  template<IR::OpSize SrcElementSize, bool Widen>
-  void AVXVector_CVT_Int_To_Float(OpcodeArgs);
-
   template<FEXCore::IR::IROps IROp, IR::OpSize ElementSize>
   void VectorScalarInsertALUOp(OpcodeArgs);
   template<FEXCore::IR::IROps IROp, IR::OpSize ElementSize>
@@ -1032,7 +1026,7 @@ class OpDispatchBuilder final : public IREmitter {
   template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
   void AVX128_Vector_CVT_Float_To_Float(OpcodeArgs);
 
-  template<IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode>
+  template<IR::OpSize SrcElementSize, bool HostRoundingMode>
   void AVX128_Vector_CVT_Float_To_Int(OpcodeArgs);
 
   template<IR::OpSize SrcElementSize, bool Widen>
@@ -1471,7 +1465,10 @@ class OpDispatchBuilder final : public IREmitter {
   Ref Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize,
                                     const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op);
 
-  Ref Vector_CVT_Float_To_IntImpl(OpcodeArgs, IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode);
+  Ref CVTFPR_To_GPRImpl(OpcodeArgs, Ref Src, IR::OpSize SrcElementSize, bool HostRoundingMode);
+
+  Ref Vector_CVT_Float_To_Int32Impl(OpcodeArgs, IR::OpSize DstSize, Ref Src, IR::OpSize SrcSize, IR::OpSize SrcElementSize,
+                                    bool HostRoundingMode, bool ZeroUpperHalf);
 
   Ref Vector_CVT_Int_To_FloatImpl(OpcodeArgs, IR::OpSize SrcElementSize, bool Widen);
 
diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
index 5bcf58d12a..f8f32c62a3 100644
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
@@ -116,8 +116,8 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
     {OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float<OpSize::i32Bit, OpSize::i64Bit>},
 
     {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float<OpSize::i32Bit, false>},
-    {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<OpSize::i32Bit, false, true>},
-    {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<OpSize::i32Bit, false, false>},
+    {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<OpSize::i32Bit, true>},
+    {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<OpSize::i32Bit, false>},
 
     {OPD(1, 0b00, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFSUB, OpSize::i32Bit>},
     {OPD(1, 0b01, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFSUB, OpSize::i64Bit>},
@@ -217,9 +217,9 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
     {OPD(1, 0b01, 0xE4), 1, &OpDispatchBuilder::AVX128_VPMULHW<false>},
     {OPD(1, 0b01, 0xE5), 1, &OpDispatchBuilder::AVX128_VPMULHW<true>},
 
-    {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<OpSize::i64Bit, true, false>},
+    {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<OpSize::i64Bit, false>},
     {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float<OpSize::i32Bit, true>},
-    {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<OpSize::i64Bit, true, true>},
+    {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<OpSize::i64Bit, true>},
 
     {OPD(1, 0b01, 0xE7), 1, &OpDispatchBuilder::AVX128_MOVVectorNT},
 
@@ -1058,18 +1058,8 @@ void OpDispatchBuilder::AVX128_CVTFPR_To_GPR(OpcodeArgs) {
     Src.Low = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], OpSizeFromSrc(Op), Op->Flags);
   }
 
-  // GPR size is determined by REX.W
-  // Source Element size is determined by instruction
-  const auto GPRSize = OpSizeFromDst(Op);
-
-  Ref Result {};
-  if constexpr (HostRoundingMode) {
-    Result = _Float_ToGPR_S(GPRSize, SrcElementSize, Src.Low);
-  } else {
-    Result = _Float_ToGPR_ZS(GPRSize, SrcElementSize, Src.Low);
-  }
-
-  StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, OpSize::iInvalid);
+  Ref Result = CVTFPR_To_GPRImpl(Op, Src.Low, SrcElementSize, HostRoundingMode);
+  StoreResult(GPRClass, Op, Result, OpSize::iInvalid);
 }
 
 void OpDispatchBuilder::AVX128_VANDN(OpcodeArgs) {
@@ -1604,7 +1594,7 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float(OpcodeArgs) {
   AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
 }
 
-template<IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode>
+template<IR::OpSize SrcElementSize, bool HostRoundingMode>
 void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int(OpcodeArgs) {
   const auto SrcSize = GetSrcSize(Op);
 
@@ -1614,48 +1604,22 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int(OpcodeArgs) {
   auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128BitSrc);
   RefPair Result {};
 
-  if (SrcElementSize == OpSize::i64Bit && Narrow) {
-    ///< Special case for VCVTPD2DQ/CVTTPD2DQ because it has weird rounding requirements.
-    Result.Low = _Vector_F64ToI32(OpSize::i128Bit, Src.Low, HostRoundingMode ? Round_Host : Round_Towards_Zero, Is128BitSrc);
-
-    if (!Is128BitSrc) {
-      // Also convert the upper 128-bit lane
-      auto ResultHigh = _Vector_F64ToI32(OpSize::i128Bit, Src.High, HostRoundingMode ? Round_Host : Round_Towards_Zero, false);
-
-      // Zip the two halves together in to the lower 128-bits
-      Result.Low = _VZip(OpSize::i128Bit, OpSize::i64Bit, Result.Low, ResultHigh);
-    }
+  Result.Low = Vector_CVT_Float_To_Int32Impl(Op, OpSize::i128Bit, Src.Low, OpSize::i128Bit, SrcElementSize, HostRoundingMode, Is128BitSrc);
+  if (Is128BitSrc) {
+    // Zero the upper 128-bit lane of the result.
+    Result = AVX128_Zext(Result.Low);
   } else {
-    auto Convert = [this](Ref Src) -> Ref {
-      auto ElementSize = SrcElementSize;
-      if (Narrow) {
-        ElementSize = ElementSize >> 1;
-        Src = _Vector_FToF(OpSize::i128Bit, ElementSize, Src, SrcElementSize);
-      }
-
-      if (HostRoundingMode) {
-        return _Vector_FToS(OpSize::i128Bit, ElementSize, Src);
-      } else {
-        return _Vector_FToZS(OpSize::i128Bit, ElementSize, Src);
-      }
-    };
-
-    Result.Low = Convert(Src.Low);
+    Result.High = Vector_CVT_Float_To_Int32Impl(Op, OpSize::i128Bit, Src.High, OpSize::i128Bit, SrcElementSize, HostRoundingMode, false);
+    // Also convert the upper 128-bit lane
+    if (SrcElementSize == OpSize::i64Bit) {
+      // Zip the two halves together in to the lower 128-bits
+      Result.Low = _VZip(OpSize::i128Bit, OpSize::i64Bit, Result.Low, Result.High);
 
-    if (!Is128BitSrc) {
-      if (!Narrow) {
-        Result.High = Convert(Src.High);
-      } else {
-        Result.Low = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 0, Result.Low, Convert(Src.High));
-      }
+      // Zero the upper 128-bit lane of the result.
+      Result = AVX128_Zext(Result.Low);
     }
   }
 
-  if (Narrow || Is128BitSrc) {
-    // Zero the upper 128-bit lane of the result.
-    Result = AVX128_Zext(Result.Low);
-  }
-
   AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
 }
 
diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/DDDTables.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher/DDDTables.h
index 7f986b177f..044d434037 100644
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/DDDTables.h
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/DDDTables.h
@@ -7,7 +7,7 @@ constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDisp
   {0x0C, 1, &OpDispatchBuilder::PI2FWOp},
   {0x0D, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, false>},
   {0x1C, 1, &OpDispatchBuilder::PF2IWOp},
-  {0x1D, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false, false>},
+  {0x1D, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false>},
 
   {0x86, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRECP, OpSize::i32Bit>},
   {0x87, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRSQRT, OpSize::i32Bit>},
diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h
index 7cb9978ffb..ce0eb83dcd 100644
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h
@@ -57,8 +57,8 @@ constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDisp
   {0x28, 2, &OpDispatchBuilder::MOVVectorAlignedOp},
   {0x2A, 1, &OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float},
   {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
-  {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, false, false>},
-  {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, false, true>},
+  {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, false>},
+  {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, true>},
   {0x2E, 2, &OpDispatchBuilder::UCOMISxOp<OpSize::i32Bit>},
   {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i32Bit>},
   {0x51, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFSQRT, OpSize::i32Bit>},
@@ -161,7 +161,7 @@ constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDisp
   {0x58, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFADDSCALARINSERT, OpSize::i32Bit>},
   {0x59, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, OpSize::i32Bit>},
   {0x5A, 1, &OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<OpSize::i64Bit, OpSize::i32Bit>},
-  {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false, false>},
+  {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false>},
   {0x5C, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFSUBSCALARINSERT, OpSize::i32Bit>},
   {0x5D, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMINSCALARINSERT, OpSize::i32Bit>},
   {0x5E, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFDIVSCALARINSERT, OpSize::i32Bit>},
@@ -200,7 +200,7 @@ constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDisp
   {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp<OpSize::i32Bit>},
   {0xD6, 1, &OpDispatchBuilder::MOVQ2DQ<false>},
   {0xC2, 1, &OpDispatchBuilder::InsertScalarFCMPOp<OpSize::i64Bit>},
-  {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true, true>},
+  {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true>},
   {0xF0, 1, &OpDispatchBuilder::MOVVectorUnalignedOp},
 };
 
@@ -213,8 +213,8 @@ constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDisp
   {0x28, 2, &OpDispatchBuilder::MOVVectorAlignedOp},
   {0x2A, 1, &OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float},
   {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
-  {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, true, false>},
-  {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, true, true>},
+  {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, false>},
+  {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, true>},
   {0x2E, 2, &OpDispatchBuilder::UCOMISxOp<OpSize::i64Bit>},
 
   {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i64Bit>},
@@ -226,7 +226,7 @@ constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDisp
   {0x58, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, OpSize::i64Bit>},
   {0x59, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, OpSize::i64Bit>},
   {0x5A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, OpSize::i32Bit, OpSize::i64Bit, false>},
-  {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false, true>},
+  {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, true>},
   {0x5C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, OpSize::i64Bit>},
   {0x5D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, OpSize::i64Bit>},
   {0x5E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFDIV, OpSize::i64Bit>},
@@ -284,7 +284,7 @@ constexpr std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDisp
   {0xE3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i16Bit>},
   {0xE4, 1, &OpDispatchBuilder::PMULHW<false>},
   {0xE5, 1, &OpDispatchBuilder::PMULHW<true>},
-  {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true, false>},
+  {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, false>},
   {0xE7, 1, &OpDispatchBuilder::MOVVectorNTOp},
   {0xE8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i8Bit>},
   {0xE9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i16Bit>},
diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
index b0a2ea53fc..b2bdcdf9a0 100644
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
@@ -2067,6 +2067,24 @@ void OpDispatchBuilder::AVXCVTGPR_To_FPR(OpcodeArgs) {
 template void OpDispatchBuilder::AVXCVTGPR_To_FPR<OpSize::i32Bit>(OpcodeArgs);
 template void OpDispatchBuilder::AVXCVTGPR_To_FPR<OpSize::i64Bit>(OpcodeArgs);
 
+Ref OpDispatchBuilder::CVTFPR_To_GPRImpl(OpcodeArgs, Ref Src, IR::OpSize SrcElementSize, bool HostRoundingMode) {
+  // GPR size is determined by REX.W
+  // Source Element size is determined by instruction
+  const auto GPRSize = OpSizeFromDst(Op);
+
+  if (HostRoundingMode) {
+    Src = _Vector_FToI(SrcElementSize, SrcElementSize, Src, Round_Host);
+  }
+  Ref Converted = _Float_ToGPR_ZS(GPRSize, SrcElementSize, Src);
+
+  bool Dst32 = GPRSize == OpSize::i32Bit;
+  Ref MaxI = Dst32 ? _Constant(0x80000000) : _Constant(0x8000000000000000);
+  Ref MaxF = LoadAndCacheNamedVectorConstant(SrcElementSize, (SrcElementSize == OpSize::i32Bit) ?
+                                                               (Dst32 ? NAMED_VECTOR_CVTMAX_F32_I32 : NAMED_VECTOR_CVTMAX_F32_I64) :
+                                                               (Dst32 ? NAMED_VECTOR_CVTMAX_F64_I32 : NAMED_VECTOR_CVTMAX_F64_I64));
+  return _Select(GPRSize, SrcElementSize, CondClassType {FEXCore::IR::COND_FGT}, MaxF, Src, Converted, MaxI);
+}
+
 template<IR::OpSize SrcElementSize, bool HostRoundingMode>
 void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs) {
   // If loading a vector, use the full size, so we don't
@@ -2074,18 +2092,8 @@ void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs) {
   // memory, then we want to load the element size exactly.
   const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : OpSizeFromSrc(Op);
   Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags);
-
-  // GPR size is determined by REX.W
-  // Source Element size is determined by instruction
-  const auto GPRSize = OpSizeFromDst(Op);
-
-  if constexpr (HostRoundingMode) {
-    Src = _Float_ToGPR_S(GPRSize, SrcElementSize, Src);
-  } else {
-    Src = _Float_ToGPR_ZS(GPRSize, SrcElementSize, Src);
-  }
-
-  StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, GPRSize, OpSize::iInvalid);
+  Ref Result = CVTFPR_To_GPRImpl(Op, Src, SrcElementSize, HostRoundingMode);
+  StoreResult(GPRClass, Op, Result, OpSize::iInvalid);
 }
 
 template void OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i32Bit, true>(OpcodeArgs);
@@ -2127,77 +2135,43 @@ void OpDispatchBuilder::Vector_CVT_Int_To_Float(OpcodeArgs) {
 template void OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, true>(OpcodeArgs);
 template void OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, false>(OpcodeArgs);
 
-template<IR::OpSize SrcElementSize, bool Widen>
-void OpDispatchBuilder::AVXVector_CVT_Int_To_Float(OpcodeArgs) {
-  Ref Result = Vector_CVT_Int_To_FloatImpl(Op, SrcElementSize, Widen);
-  StoreResult(FPRClass, Op, Result, OpSize::iInvalid);
-}
-
-template void OpDispatchBuilder::AVXVector_CVT_Int_To_Float<OpSize::i32Bit, false>(OpcodeArgs);
-template void OpDispatchBuilder::AVXVector_CVT_Int_To_Float<OpSize::i32Bit, true>(OpcodeArgs);
-
-Ref OpDispatchBuilder::Vector_CVT_Float_To_IntImpl(OpcodeArgs, IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode) {
-  const auto DstSize = OpSizeFromDst(Op);
-  auto ElementSize = SrcElementSize;
-
-  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
-
-  if (Narrow) {
-    Src = _Vector_FToF(DstSize, SrcElementSize >> 1, Src, SrcElementSize);
-    ElementSize = ElementSize >> 1;
-  }
-
+Ref OpDispatchBuilder::Vector_CVT_Float_To_Int32Impl(OpcodeArgs, IR::OpSize DstSize, Ref Src, IR::OpSize SrcSize, IR::OpSize SrcElementSize,
+                                                     bool HostRoundingMode, bool ZeroUpperHalf) {
   if (HostRoundingMode) {
-    return _Vector_FToS(DstSize, ElementSize, Src);
-  } else {
-    return _Vector_FToZS(DstSize, ElementSize, Src);
+    Src = _Vector_FToI(SrcSize, SrcElementSize, Src, Round_Host);
   }
-}
 
-template<IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode>
-void OpDispatchBuilder::Vector_CVT_Float_To_Int(OpcodeArgs) {
-  const auto DstSize = OpSizeFromDst(Op);
+  OpSize OverflowConstSize = ZeroUpperHalf && SrcElementSize == OpSize::i64Bit ? DstSize / 2 : DstSize;
+  Ref MaxI = LoadAndCacheNamedVectorConstant(OverflowConstSize, NAMED_VECTOR_CVTMAX_I32);
+  Ref Converted {}, Cmp {};
+  if (SrcElementSize == OpSize::i64Bit) {
+    Ref MaxF = LoadAndCacheNamedVectorConstant(SrcSize, NAMED_VECTOR_CVTMAX_F64_I32);
+    Converted = _Vector_F64ToI32(DstSize, Src, Round_Towards_Zero, ZeroUpperHalf);
 
-  Ref Result {};
-  if (SrcElementSize == OpSize::i64Bit && Narrow) {
-    ///< Special case for CVTTPD2DQ because it has weird rounding requirements.
-    Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
-    Result = _Vector_F64ToI32(DstSize, Src, HostRoundingMode ? Round_Host : Round_Towards_Zero, true);
+    Cmp = _VFCMPGT(SrcSize, OpSize::i64Bit, MaxF, Src);
+    Cmp = _VUShrNI(DstSize, OpSize::i64Bit, Cmp, 32);
   } else {
-    Result = Vector_CVT_Float_To_IntImpl(Op, SrcElementSize, Narrow, HostRoundingMode);
+    Ref MaxF = LoadAndCacheNamedVectorConstant(DstSize, NAMED_VECTOR_CVTMAX_F32_I32);
+    Converted = _Vector_FToZS(DstSize, OpSize::i32Bit, Src);
+    Cmp = _VFCMPGT(DstSize, OpSize::i32Bit, MaxF, Src);
   }
-
-  StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid);
+  return _VBSL(DstSize, Cmp, Converted, MaxI);
 }
 
-template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false, false>(OpcodeArgs);
-template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false, true>(OpcodeArgs);
-template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, true, false>(OpcodeArgs);
-
-template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true, true>(OpcodeArgs);
-template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true, false>(OpcodeArgs);
-
-template<IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode>
-void OpDispatchBuilder::AVXVector_CVT_Float_To_Int(OpcodeArgs) {
+template<IR::OpSize SrcElementSize, bool HostRoundingMode>
+void OpDispatchBuilder::Vector_CVT_Float_To_Int(OpcodeArgs) {
   const auto DstSize = OpSizeFromDst(Op);
 
-  Ref Result {};
-  if (SrcElementSize == OpSize::i64Bit && Narrow) {
-    ///< Special case for CVTPD2DQ/CVTTPD2DQ because it has weird rounding requirements.
-    Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
-    Result = _Vector_F64ToI32(DstSize, Src, HostRoundingMode ? Round_Host : Round_Towards_Zero, true);
-  } else {
-    Result = Vector_CVT_Float_To_IntImpl(Op, SrcElementSize, Narrow, HostRoundingMode);
-  }
-
+  Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
+  Ref Result = Vector_CVT_Float_To_Int32Impl(Op, DstSize, Src, OpSizeFromSrc(Op), SrcElementSize, HostRoundingMode, true);
   StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid);
 }
 
-template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<OpSize::i32Bit, false, false>(OpcodeArgs);
-template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<OpSize::i32Bit, false, true>(OpcodeArgs);
+template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false>(OpcodeArgs);
+template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, true>(OpcodeArgs);
 
-template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<OpSize::i64Bit, true, false>(OpcodeArgs);
-template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<OpSize::i64Bit, true, true>(OpcodeArgs);
+template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true>(OpcodeArgs);
+template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, false>(OpcodeArgs);
 
 Ref OpDispatchBuilder::Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize,
                                                      const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) {
@@ -2277,7 +2251,7 @@ void OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) {
   StoreResult(FPRClass, Op, Src, OpSize::iInvalid);
 }
 
-template<IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode>
+template<IR::OpSize SrcElementSize, bool HostRoundingMode>
 void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) {
   // This function causes a change in MMX state from X87 to MMX
   if (MMXState == MMXState_X87) {
@@ -2288,29 +2262,16 @@ void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) {
   // unnecessarily zero extend the vector. Otherwise, if
   // memory, then we want to load the element size exactly.
   const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : OpSizeFromSrc(Op);
+  const auto DstSize = OpSizeFromDst(Op);
   Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags);
-
-  auto ElementSize = SrcElementSize;
-  const auto Size = OpSizeFromDst(Op);
-
-  if (Narrow) {
-    Src = _Vector_FToF(Size, SrcElementSize >> 1, Src, SrcElementSize);
-    ElementSize = ElementSize >> 1;
-  }
-
-  if constexpr (HostRoundingMode) {
-    Src = _Vector_FToS(Size, ElementSize, Src);
-  } else {
-    Src = _Vector_FToZS(Size, ElementSize, Src);
-  }
-
-  StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, Size, OpSize::iInvalid);
+  Ref Result = Vector_CVT_Float_To_Int32Impl(Op, DstSize, Src, SrcSize, SrcElementSize, HostRoundingMode, false /* TODO? */);
+  StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid);
 }
 
-template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, false, false>(OpcodeArgs);
-template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, false, true>(OpcodeArgs);
-template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, true, false>(OpcodeArgs);
-template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, true, true>(OpcodeArgs);
+template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, false>(OpcodeArgs);
+template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, true>(OpcodeArgs);
+template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, false>(OpcodeArgs);
+template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, true>(OpcodeArgs);
 
 void OpDispatchBuilder::MASKMOVOp(OpcodeArgs) {
   const auto Size = OpSizeFromSrc(Op);
diff --git a/FEXCore/Source/Interface/IR/IRDumper.cpp b/FEXCore/Source/Interface/IR/IRDumper.cpp
index b4a94aa1b8..c1fdfa5e0d 100644
--- a/FEXCore/Source/Interface/IR/IRDumper.cpp
+++ b/FEXCore/Source/Interface/IR/IRDumper.cpp
@@ -209,6 +209,18 @@ static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView
         return "x87_log10_2";
       case NamedVectorConstant::NAMED_VECTOR_X87_LOG_2:
         return "x87_log2";
+      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F32_I32:
+        return "cvtmax_f32_i32";
+      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F32_I64:
+        return "cvtmax_f32_i64";
+      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F64_I32:
+        return "cvtmax_f64_i32";
+      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F64_I64:
+        return "cvtmax_f64_i64";
+      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_I32:
+        return "cvtmax_i32";
+      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_I64:
+        return "cvtmax_i64";
       default:
         return "<Unknown Named Vector Constant>";
     }
diff --git a/FEXCore/include/FEXCore/IR/IR.h b/FEXCore/include/FEXCore/IR/IR.h
index 005b41883c..635d62c3a4 100644
--- a/FEXCore/include/FEXCore/IR/IR.h
+++ b/FEXCore/include/FEXCore/IR/IR.h
@@ -71,6 +71,13 @@ enum NamedVectorConstant : uint8_t {
   NAMED_VECTOR_X87_LOG10_2,
   NAMED_VECTOR_X87_LOG_2,
 
+  NAMED_VECTOR_CVTMAX_F32_I32,
+  NAMED_VECTOR_CVTMAX_F32_I64,
+  NAMED_VECTOR_CVTMAX_F64_I32,
+  NAMED_VECTOR_CVTMAX_F64_I64,
+  NAMED_VECTOR_CVTMAX_I32,
+  NAMED_VECTOR_CVTMAX_I64,
+
   NAMED_VECTOR_CONST_POOL_MAX,
   // Beginning of named constants that don't have a constant pool backing.
   NAMED_VECTOR_ZERO = NAMED_VECTOR_CONST_POOL_MAX,
diff --git a/unittests/gcc-target-tests-64/Known_Failures b/unittests/gcc-target-tests-64/Known_Failures
index 46d1bc6c35..d7b0bdcd09 100644
--- a/unittests/gcc-target-tests-64/Known_Failures
+++ b/unittests/gcc-target-tests-64/Known_Failures
@@ -12,13 +12,3 @@ asm-5.c.gcc-target-test-64
 # Which turns the value in to 0xfffff2f5
 # This causes its comparison to fail
 sse2-mmx-pextrw.c.gcc-target-test-64
-
-# These tests fail because of things unrelated to the sse4.1 instructions
-sse4_1-ceil-sfix-vec.c.gcc-target-test-64
-sse4_1-ceilf-sfix-vec.c.gcc-target-test-64
-sse4_1-floor-sfix-vec.c.gcc-target-test-64
-sse4_1-floorf-sfix-vec.c.gcc-target-test-64
-sse4_1-rint-sfix-vec.c.gcc-target-test-64
-sse4_1-rintf-sfix-vec.c.gcc-target-test-64
-sse4_1-round-sfix-vec.c.gcc-target-test-64
-sse4_1-roundf-sfix-vec.c.gcc-target-test-64