diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7982b9134..5beaf3b38 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -649,6 +649,11 @@ if(MSVC)
target_compile_definitions(sdl-build-options INTERFACE "-D_CRT_SECURE_NO_WARNINGS")
endif()
+if(MSVC)
+ # Due to a limitation of Microsoft's LTO implementation, LTO must be disabled for memcpy and memset.
+ set_property(SOURCE src/stdlib/SDL_memcpy.c src/stdlib/SDL_memset.c APPEND PROPERTY COMPILE_FLAGS /GL-)
+endif()
+
if(SDL_ASSEMBLY)
if(USE_GCC OR USE_CLANG)
# TODO: Those all seem to be quite GCC specific - needs to be
diff --git a/Makefile.os2 b/Makefile.os2
index 14b4953d4..0b1b13a1c 100644
--- a/Makefile.os2
+++ b/Makefile.os2
@@ -68,7 +68,7 @@ CFLAGS_DLL+= -DSDL_BUILD_MINOR_VERSION=$(MINOR_VERSION)
CFLAGS_DLL+= -DSDL_BUILD_MICRO_VERSION=$(MICRO_VERSION)
SRCS = SDL.c SDL_assert.c SDL_error.c SDL_guid.c SDL_log.c SDL_dataqueue.c SDL_hints.c SDL_list.c
-SRCS+= SDL_getenv.c SDL_iconv.c SDL_malloc.c SDL_qsort.c SDL_stdlib.c SDL_string.c SDL_strtokr.c SDL_crc32.c
+SRCS+= SDL_getenv.c SDL_iconv.c SDL_malloc.c SDL_memcpy.c SDL_memset.c SDL_qsort.c SDL_stdlib.c SDL_string.c SDL_strtokr.c SDL_crc32.c
SRCS+= SDL_cpuinfo.c SDL_atomic.c SDL_spinlock.c SDL_thread.c SDL_timer.c
SRCS+= SDL_rwops.c SDL_power.c
SRCS+= SDL_audio.c SDL_audiocvt.c SDL_audiodev.c SDL_audiotypecvt.c SDL_mixer.c SDL_wave.c
diff --git a/Makefile.w32 b/Makefile.w32
index fe1989004..2a186928d 100644
--- a/Makefile.w32
+++ b/Makefile.w32
@@ -44,7 +44,7 @@ CFLAGS_DLL+= -DSDL_BUILD_MICRO_VERSION=$(MICRO_VERSION)
RCFLAGS = -q -r -bt=nt $(INCPATH)
SRCS = SDL.c SDL_assert.c SDL_error.c SDL_guid.c SDL_log.c SDL_dataqueue.c SDL_hints.c SDL_list.c
-SRCS+= SDL_getenv.c SDL_iconv.c SDL_malloc.c SDL_qsort.c SDL_stdlib.c SDL_string.c SDL_strtokr.c SDL_crc32.c
+SRCS+= SDL_getenv.c SDL_iconv.c SDL_malloc.c SDL_memcpy.c SDL_memset.c SDL_qsort.c SDL_stdlib.c SDL_string.c SDL_strtokr.c SDL_crc32.c
SRCS+= SDL_cpuinfo.c SDL_atomic.c SDL_spinlock.c SDL_thread.c SDL_timer.c
SRCS+= SDL_rwops.c SDL_power.c
SRCS+= SDL_audio.c SDL_audiocvt.c SDL_audiodev.c SDL_audiotypecvt.c SDL_mixer.c SDL_wave.c
diff --git a/VisualC-WinRT/SDL-UWP.vcxproj b/VisualC-WinRT/SDL-UWP.vcxproj
index 80c837972..a4d77445b 100644
--- a/VisualC-WinRT/SDL-UWP.vcxproj
+++ b/VisualC-WinRT/SDL-UWP.vcxproj
@@ -311,6 +311,8 @@
+
+
diff --git a/VisualC/SDL/SDL.vcxproj b/VisualC/SDL/SDL.vcxproj
index 04ed6b46f..013ca0202 100644
--- a/VisualC/SDL/SDL.vcxproj
+++ b/VisualC/SDL/SDL.vcxproj
@@ -562,6 +562,8 @@
+
+
diff --git a/src/stdlib/SDL_memcpy.c b/src/stdlib/SDL_memcpy.c
new file mode 100644
index 000000000..bedbce8f5
--- /dev/null
+++ b/src/stdlib/SDL_memcpy.c
@@ -0,0 +1,79 @@
+/*
+ Simple DirectMedia Layer
+ Copyright (C) 1997-2022 Sam Lantinga
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "../SDL_internal.h"
+
+/* This file contains a portable memcpy manipulation function for SDL */
+
+void *
+SDL_memcpy(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
+{
+#ifdef __GNUC__
+ /* Presumably this is well tuned for speed.
+ On my machine this is twice as fast as the C code below.
+ */
+ return __builtin_memcpy(dst, src, len);
+#elif defined(HAVE_MEMCPY)
+ return memcpy(dst, src, len);
+#elif defined(HAVE_BCOPY)
+ bcopy(src, dst, len);
+ return dst;
+#else
+ /* GCC 4.9.0 with -O3 will generate movaps instructions with the loop
+ using Uint32* pointers, so we need to make sure the pointers are
+ aligned before we loop using them.
+ */
+ if (((uintptr_t)src & 0x3) || ((uintptr_t)dst & 0x3)) {
+ /* Do an unaligned byte copy */
+ Uint8 *srcp1 = (Uint8 *)src;
+ Uint8 *dstp1 = (Uint8 *)dst;
+
+ while (len--) {
+ *dstp1++ = *srcp1++;
+ }
+ } else {
+ size_t left = (len % 4);
+ Uint32 *srcp4, *dstp4;
+ Uint8 *srcp1, *dstp1;
+
+ srcp4 = (Uint32 *) src;
+ dstp4 = (Uint32 *) dst;
+ len /= 4;
+ while (len--) {
+ *dstp4++ = *srcp4++;
+ }
+
+ srcp1 = (Uint8 *) srcp4;
+ dstp1 = (Uint8 *) dstp4;
+ switch (left) {
+ case 3:
+ *dstp1++ = *srcp1++;
+ case 2:
+ *dstp1++ = *srcp1++;
+ case 1:
+ *dstp1++ = *srcp1++;
+ }
+ }
+ return dst;
+#endif /* __GNUC__ */
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/stdlib/SDL_memset.c b/src/stdlib/SDL_memset.c
new file mode 100644
index 000000000..c2a487591
--- /dev/null
+++ b/src/stdlib/SDL_memset.c
@@ -0,0 +1,75 @@
+/*
+ Simple DirectMedia Layer
+ Copyright (C) 1997-2022 Sam Lantinga
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "../SDL_internal.h"
+
+/* This file contains a portable memset manipulation function for SDL */
+
+void *
+SDL_memset(SDL_OUT_BYTECAP(len) void *dst, int c, size_t len)
+{
+#if defined(HAVE_MEMSET)
+ return memset(dst, c, len);
+#else
+ size_t left;
+ Uint32 *dstp4;
+ Uint8 *dstp1 = (Uint8 *) dst;
+ Uint8 value1;
+ Uint32 value4;
+
+ /* The value used in memset() is a byte, passed as an int */
+ c &= 0xff;
+
+ /* The destination pointer needs to be aligned on a 4-byte boundary to
+ * execute a 32-bit set. Set first bytes manually if needed until it is
+ * aligned. */
+ value1 = (Uint8)c;
+ while ((uintptr_t)dstp1 & 0x3) {
+ if (len--) {
+ *dstp1++ = value1;
+ } else {
+ return dst;
+ }
+ }
+
+ value4 = ((Uint32)c | ((Uint32)c << 8) | ((Uint32)c << 16) | ((Uint32)c << 24));
+ dstp4 = (Uint32 *) dstp1;
+ left = (len % 4);
+ len /= 4;
+ while (len--) {
+ *dstp4++ = value4;
+ }
+
+ dstp1 = (Uint8 *) dstp4;
+ switch (left) {
+ case 3:
+ *dstp1++ = value1;
+ case 2:
+ *dstp1++ = value1;
+ case 1:
+ *dstp1++ = value1;
+ }
+
+ return dst;
+#endif /* HAVE_MEMSET */
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/stdlib/SDL_string.c b/src/stdlib/SDL_string.c
index 77e0d30f5..943e72740 100644
--- a/src/stdlib/SDL_string.c
+++ b/src/stdlib/SDL_string.c
@@ -263,108 +263,6 @@ SDL_ScanFloat(const char *text, double *valuep)
}
#endif
-void *
-SDL_memset(SDL_OUT_BYTECAP(len) void *dst, int c, size_t len)
-{
-#if defined(HAVE_MEMSET)
- return memset(dst, c, len);
-#else
- size_t left;
- Uint32 *dstp4;
- Uint8 *dstp1 = (Uint8 *) dst;
- Uint8 value1;
- Uint32 value4;
-
- /* The value used in memset() is a byte, passed as an int */
- c &= 0xff;
-
- /* The destination pointer needs to be aligned on a 4-byte boundary to
- * execute a 32-bit set. Set first bytes manually if needed until it is
- * aligned. */
- value1 = (Uint8)c;
- while ((uintptr_t)dstp1 & 0x3) {
- if (len--) {
- *dstp1++ = value1;
- } else {
- return dst;
- }
- }
-
- value4 = ((Uint32)c | ((Uint32)c << 8) | ((Uint32)c << 16) | ((Uint32)c << 24));
- dstp4 = (Uint32 *) dstp1;
- left = (len % 4);
- len /= 4;
- while (len--) {
- *dstp4++ = value4;
- }
-
- dstp1 = (Uint8 *) dstp4;
- switch (left) {
- case 3:
- *dstp1++ = value1;
- case 2:
- *dstp1++ = value1;
- case 1:
- *dstp1++ = value1;
- }
-
- return dst;
-#endif /* HAVE_MEMSET */
-}
-
-void *
-SDL_memcpy(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
-{
-#ifdef __GNUC__
- /* Presumably this is well tuned for speed.
- On my machine this is twice as fast as the C code below.
- */
- return __builtin_memcpy(dst, src, len);
-#elif defined(HAVE_MEMCPY)
- return memcpy(dst, src, len);
-#elif defined(HAVE_BCOPY)
- bcopy(src, dst, len);
- return dst;
-#else
- /* GCC 4.9.0 with -O3 will generate movaps instructions with the loop
- using Uint32* pointers, so we need to make sure the pointers are
- aligned before we loop using them.
- */
- if (((uintptr_t)src & 0x3) || ((uintptr_t)dst & 0x3)) {
- /* Do an unaligned byte copy */
- Uint8 *srcp1 = (Uint8 *)src;
- Uint8 *dstp1 = (Uint8 *)dst;
-
- while (len--) {
- *dstp1++ = *srcp1++;
- }
- } else {
- size_t left = (len % 4);
- Uint32 *srcp4, *dstp4;
- Uint8 *srcp1, *dstp1;
-
- srcp4 = (Uint32 *) src;
- dstp4 = (Uint32 *) dst;
- len /= 4;
- while (len--) {
- *dstp4++ = *srcp4++;
- }
-
- srcp1 = (Uint8 *) srcp4;
- dstp1 = (Uint8 *) dstp4;
- switch (left) {
- case 3:
- *dstp1++ = *srcp1++;
- case 2:
- *dstp1++ = *srcp1++;
- case 1:
- *dstp1++ = *srcp1++;
- }
- }
- return dst;
-#endif /* __GNUC__ */
-}
-
void *
SDL_memmove(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
{