[OMPT-svn-commit] OpenMPT r25340 - trunk/OpenMPT/src/openmpt/soundbase

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Author: manx
Date: Wed May 20 18:42:18 2026
New Revision: 25340
URL: https://source.openmpt.org/browse/openmpt/?op=revision&rev=25340

Log:
[Ref] openmpt/soundbase/SampleConvert.hpp: Use mpt::lround and mpt::llround when appropriate, instead of using mpt::round and casting the result to integer. This gives the compiler more freedom. It also removes 1 additional case where fastround() is no longer faster.

Modified:
   trunk/OpenMPT/src/openmpt/soundbase/SampleConvert.hpp

Modified: trunk/OpenMPT/src/openmpt/soundbase/SampleConvert.hpp
==============================================================================

--- trunk/OpenMPT/src/openmpt/soundbase/SampleConvert.hpp	Wed May 20 18:40:16 2026	(r25339)
+++ trunk/OpenMPT/src/openmpt/soundbase/SampleConvert.hpp	Wed May 20 18:42:18 2026	(r25340)
@@ -49,11 +49,23 @@
 // amd64 SSE4.2 precise inline+fixup inline       +
 // amd64 AVX2   precise inline+fixup inline       +
 //
-// See <https://godbolt.org/z/dYYGenjKT> for a microbenchmark.
+// std::round: <https://godbolt.org/z/dYYGenjKT>
+// 
+// arch  SSE    /fp:    std::lround fastlround   better
 //
-// TODO: Real-world benchmark of amd64 SSE4.2 and amd64 AVX2.
-// The inlined additional code might fold away, or nearly away,
-// in which case the canonical semantically correct std::round should be preferred.
+// i386  x87    fast    2*call      fixup+2*call -
+// i386  SSE2   fast    inline      inline+fixup -
+// amd64 SSE2   fast    call        inline       +
+// amd64 SSE4.2 fast    call        inline       +
+// amd64 AVX2   fast    call        inline       +
+//
+// i386  x87    precise call        fixup+2*call -
+// i386  SSE2   precise call        fixup+call   -
+// amd64 SSE2   precise call        fixup+call   -
+// amd64 SSE4.2 precise call        inline       +
+// amd64 AVX2   precise call        inline       +
+//
+// std::lround: <https://godbolt.org/z/rcbcbK7b7>
 
 #if MPT_COMPILER_MSVC
 #if defined(_M_FP_FAST) && (_M_FP_FAST == 1)
@@ -62,7 +74,7 @@
 #elif MPT_ARCH_X86 && (defined(MPT_ARCH_X86_FPU) && !defined(MPT_ARCH_X86_SSE2))
 #define MPT_SOUNDBASE_SAMPLECONVERT_HPP_FASTROUND 0
 #elif MPT_ARCH_X86 && (defined(MPT_ARCH_X86_SSE2))
-#define MPT_SOUNDBASE_SAMPLECONVERT_HPP_FASTROUND 1
+#define MPT_SOUNDBASE_SAMPLECONVERT_HPP_FASTROUND 0
 #elif MPT_ARCH_AMD64 && (defined(MPT_ARCH_X86_SSE2) && !defined(MPT_ARCH_X86_SSE4_2))
 #define MPT_SOUNDBASE_SAMPLECONVERT_HPP_FASTROUND 1
 #elif MPT_ARCH_AMD64 && (defined(MPT_ARCH_X86_SSE4_2) && !defined(MPT_ARCH_X86_AVX2))
@@ -98,6 +110,18 @@
 	static_assert(std::is_floating_point<Tfloat>::value);
 	return std::floor(x + static_cast<Tfloat>(0.5));
 }
+template <typename Tfloat>
+MPT_ATTR_ALWAYSINLINE MPT_INLINE_FORCE long fastlround(Tfloat x)
+{
+	static_assert(std::is_floating_point<Tfloat>::value);
+	return static_cast<long>(std::floor(x + static_cast<Tfloat>(0.5)));
+}
+template <typename Tfloat>
+MPT_ATTR_ALWAYSINLINE MPT_INLINE_FORCE long long fastllround(Tfloat x)
+{
+	static_assert(std::is_floating_point<Tfloat>::value);
+	return static_cast<long long>(std::floor(x + static_cast<Tfloat>(0.5)));
+}
 #else
 template <typename Tfloat>
 MPT_ATTR_ALWAYSINLINE MPT_INLINE_FORCE Tfloat fastround(Tfloat x)
@@ -105,6 +129,18 @@
 	static_assert(std::is_floating_point<Tfloat>::value);
 	return mpt::round(x);
 }
+template <typename Tfloat>
+MPT_ATTR_ALWAYSINLINE MPT_INLINE_FORCE long fastlround(Tfloat x)
+{
+	static_assert(std::is_floating_point<Tfloat>::value);
+	return mpt::lround(x);
+}
+template <typename Tfloat>
+MPT_ATTR_ALWAYSINLINE MPT_INLINE_FORCE long long fastllround(Tfloat x)
+{
+	static_assert(std::is_floating_point<Tfloat>::value);
+	return mpt::llround(x);
+}
 #endif
 
 
@@ -221,7 +257,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0f, 1.0f);
 		val *= 128.0f;
-		return static_cast<uint8>(mpt::saturate_cast<int8>(static_cast<int>(SC::fastround(val))) + 0x80);
+		return static_cast<uint8>(mpt::saturate_cast<int8>(SC::fastlround(val)) + 0x80);
 	}
 };
 
@@ -234,7 +270,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0, 1.0);
 		val *= 128.0;
-		return static_cast<uint8>(mpt::saturate_cast<int8>(static_cast<int>(SC::fastround(val))) + 0x80);
+		return static_cast<uint8>(mpt::saturate_cast<int8>(SC::fastlround(val)) + 0x80);
 	}
 };
 
@@ -302,7 +338,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0f, 1.0f);
 		val *= 128.0f;
-		return mpt::saturate_cast<int8>(static_cast<int>(SC::fastround(val)));
+		return mpt::saturate_cast<int8>(SC::fastlround(val));
 	}
 };
 
@@ -315,7 +351,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0, 1.0);
 		val *= 128.0;
-		return mpt::saturate_cast<int8>(static_cast<int>(SC::fastround(val)));
+		return mpt::saturate_cast<int8>(SC::fastlround(val));
 	}
 };
 
@@ -383,7 +419,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0f, 1.0f);
 		val *= 32768.0f;
-		return mpt::saturate_cast<int16>(static_cast<int>(SC::fastround(val)));
+		return mpt::saturate_cast<int16>(SC::fastlround(val));
 	}
 };
 
@@ -396,7 +432,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0, 1.0);
 		val *= 32768.0;
-		return mpt::saturate_cast<int16>(static_cast<int>(SC::fastround(val)));
+		return mpt::saturate_cast<int16>(SC::fastlround(val));
 	}
 };
 
@@ -464,7 +500,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0f, 1.0f);
 		val *= 2147483648.0f;
-		return static_cast<int24>(mpt::rshift_signed(mpt::saturate_cast<int32>(static_cast<int64>(SC::fastround(val))), 8));
+		return static_cast<int24>(mpt::rshift_signed(mpt::saturate_cast<int32>(SC::fastllround(val)), 8));
 	}
 };
 
@@ -477,7 +513,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0, 1.0);
 		val *= 2147483648.0;
-		return static_cast<int24>(mpt::rshift_signed(mpt::saturate_cast<int32>(static_cast<int64>(SC::fastround(val))), 8));
+		return static_cast<int24>(mpt::rshift_signed(mpt::saturate_cast<int32>(SC::fastllround(val)), 8));
 	}
 };
 
@@ -545,7 +581,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0f, 1.0f);
 		val *= 2147483648.0f;
-		return mpt::saturate_cast<int32>(static_cast<int64>(SC::fastround(val)));
+		return mpt::saturate_cast<int32>(SC::fastllround(val));
 	}
 };
 
@@ -558,7 +594,7 @@
 	{
 		val = mpt::safe_clamp(val, -1.0, 1.0);
 		val *= 2147483648.0;
-		return mpt::saturate_cast<int32>(static_cast<int64>(SC::fastround(val)));
+		return mpt::saturate_cast<int32>(SC::fastllround(val));
 	}
 };
 




[OMPT-svn-commit] OpenMPT r25340 - trunk/OpenMPT/src/openmpt/soundbase

A music tracker software for Windows.

[OMPT-svn-commit] OpenMPT r25340 - trunk/OpenMPT/src/openmpt/soundbase