From b6c1563272eef9cb323e1970a8050e70b0954c14 Mon Sep 17 00:00:00 2001 From: Jack Andersen Date: Mon, 31 Aug 2015 11:23:45 -1000 Subject: [PATCH] utf8 fix --- hecl/.gitignore | 1 + hecl/extern/Athena | 2 +- hecl/lib/WideStringConvert.cpp | 42 +++++++++++++++------------------- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/hecl/.gitignore b/hecl/.gitignore index 3de5fc0e2..95467dc68 100644 --- a/hecl/.gitignore +++ b/hecl/.gitignore @@ -1,2 +1,3 @@ DataSpecRegistry.hpp +blender/hecl.zip diff --git a/hecl/extern/Athena b/hecl/extern/Athena index fd55c9298..bca146dbf 160000 --- a/hecl/extern/Athena +++ b/hecl/extern/Athena @@ -1 +1 @@ -Subproject commit fd55c9298330d0809b5d2c7dc4277f87c6d29c19 +Subproject commit bca146dbfce090d2c12773e26f4f097a3843d9c1 diff --git a/hecl/lib/WideStringConvert.cpp b/hecl/lib/WideStringConvert.cpp index d21c92568..4cd3c4f20 100644 --- a/hecl/lib/WideStringConvert.cpp +++ b/hecl/lib/WideStringConvert.cpp @@ -1,3 +1,4 @@ +#include #include "HECL/HECL.hpp" namespace HECL @@ -5,45 +6,40 @@ namespace HECL std::string WideToUTF8(const std::wstring& src) { -#if _WIN32 - int len = WideCharToMultiByte(CP_UTF8, 0, src.c_str(), src.size(), nullptr, 0, nullptr, nullptr); - std::string retval(len, '\0'); - WideCharToMultiByte(CP_UTF8, 0, src.c_str(), src.size(), &retval[0], len, nullptr, nullptr); - return retval; -#else std::string retval; retval.reserve(src.length()); - std::mbstate_t state = {}; for (wchar_t ch : src) { - char mb[MB_LEN_MAX]; - int c = std::wcrtomb(mb, ch, &state); - retval.append(mb, c); + utf8proc_uint8_t mb[4]; + utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(ch), mb); + if (c < 0) + { + LogModule.report(LogVisor::Warning, "invalid UTF-8 character while encoding"); + return retval; + } + retval.append(reinterpret_cast(mb), c); } return retval; -#endif } std::wstring UTF8ToWide(const std::string& src) { -#if _WIN32 - int len = MultiByteToWideChar(CP_UTF8, 0, src.c_str(), src.size(), nullptr, 0); - std::wstring retval(len, L'\0'); - MultiByteToWideChar(CP_UTF8, 0, src.c_str(), src.size(), &retval[0], len); - return retval; -#else std::wstring retval; retval.reserve(src.length()); - const char* buf = src.c_str(); - std::mbstate_t state = {}; + const utf8proc_uint8_t* buf = reinterpret_cast(src.c_str()); while (*buf) { - wchar_t wc; - buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state); - retval += wc; + utf8proc_int32_t wc; + utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc); + if (len < 0) + { + LogModule.report(LogVisor::Warning, "invalid UTF-8 character while decoding"); + return retval; + } + buf += len; + retval += wchar_t(wc); } return retval; -#endif } }