From 712e0d1f06bf413213bc4a9d3be2ac8d9a34a9f9 Mon Sep 17 00:00:00 2001 From: "Henry G. Stratmann III" <82091+hgs3@users.noreply.github.com> Date: Sat, 7 Aug 2021 16:23:15 -0500 Subject: [PATCH] Fixing WM_CHAR event handling for Unicode characters outside the Basic Multilingual Plane. --- src/video/windows/SDL_windowsevents.c | 36 +++++++++++++++++++++++---- src/video/windows/SDL_windowswindow.c | 1 + src/video/windows/SDL_windowswindow.h | 1 + 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/video/windows/SDL_windowsevents.c b/src/video/windows/SDL_windowsevents.c index 1481b0314..960443dbe 100644 --- a/src/video/windows/SDL_windowsevents.c +++ b/src/video/windows/SDL_windowsevents.c @@ -862,15 +862,41 @@ WIN_WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) case WM_UNICHAR: if (wParam == UNICODE_NOCHAR) { returnCode = 1; - break; - } - /* otherwise fall through to below */ - case WM_CHAR: - { + } else { char text[5]; if (WIN_ConvertUTF32toUTF8((UINT32)wParam, text)) { SDL_SendKeyboardText(text); } + returnCode = 0; + } + break; + + case WM_CHAR: + /* When a user enters a Unicode code point defined in the Basic Multilingual Plane, Windows sends a WM_CHAR + message with the code point encoded as UTF-16. When a user enters a Unicode code point from a Supplementary + Plane, Windows sends the code point in two separate WM_CHAR messages: The first message includes the UTF-16 + High Surrogate and the second the UTF-16 Low Surrogate. The High and Low Surrogates cannot be individually + converted to valid UTF-8, therefore, we must save the High Surrogate from the first WM_CHAR message and + concatenate it with the Low Surrogate from the second WM_CHAR message. At that point, we have a valid + UTF-16 surrogate pair ready to re-encode as UTF-8. */ + if (IS_HIGH_SURROGATE(wParam)) { + data->high_surrogate = (WCHAR)wParam; + } else if (IS_SURROGATE_PAIR(data->high_surrogate, wParam)) { + /* The code point is in a Supplementary Plane. + Here wParam is the Low Surrogate. */ + const WCHAR surrogate_pair[] = {data->high_surrogate, (WCHAR)wParam, 0}; + char *s; + s = SDL_iconv_string("UTF-8", "UTF-16LE", (const char *)surrogate_pair, sizeof(surrogate_pair)); + SDL_SendKeyboardText(s); + SDL_free(s); + data->high_surrogate = 0; + } else { + /* The code point is in the Basic Multilingual Plane */ + const WCHAR wchar[] = {(WCHAR)wParam, 0}; + char *s; + s = SDL_iconv_string("UTF-8", "UTF-16LE", (const char *)wchar, sizeof(wchar)); + SDL_SendKeyboardText(s); + SDL_free(s); } returnCode = 0; break; diff --git a/src/video/windows/SDL_windowswindow.c b/src/video/windows/SDL_windowswindow.c index 93d1903a7..75fe64cac 100644 --- a/src/video/windows/SDL_windowswindow.c +++ b/src/video/windows/SDL_windowswindow.c @@ -186,6 +186,7 @@ SetupWindowData(_THIS, SDL_Window * window, HWND hwnd, HWND parent, SDL_bool cre data->hdc = GetDC(hwnd); data->hinstance = (HINSTANCE) GetWindowLongPtr(hwnd, GWLP_HINSTANCE); data->created = created; + data->high_surrogate = 0; data->mouse_button_flags = 0; data->last_pointer_update = (LPARAM)-1; data->videodata = videodata; diff --git a/src/video/windows/SDL_windowswindow.h b/src/video/windows/SDL_windowswindow.h index 700566e9c..e64096cda 100644 --- a/src/video/windows/SDL_windowswindow.h +++ b/src/video/windows/SDL_windowswindow.h @@ -41,6 +41,7 @@ typedef struct SDL_bool created; WPARAM mouse_button_flags; LPARAM last_pointer_update; + WCHAR high_surrogate; SDL_bool initializing; SDL_bool expected_resize; SDL_bool in_border_change;