stripped-down utf8proc added

2025-10-10 20:29:01 +00:00 · 2015-08-31 11:11:42 -10:00 · 2015-08-31 11:11:42 -10:00 · bca146dbfc
commit bca146dbfc
parent 9c44d5f4de
8 changed files with 360 additions and 2215 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -17,7 +17,7 @@ set(ATHENA_VERSION

 add_subdirectory(extern)

-include_directories(include ${LZO_INCLUDE_DIR} ${ZLIB_INCLUDE_DIR} ${YAML_INCLUDE_DIR})
+include_directories(include ${LZO_INCLUDE_DIR} ${ZLIB_INCLUDE_DIR})
 if (NOT MSVC)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 endif()
@ -69,6 +69,8 @@ add_library(AthenaCore
    include/gekko_support.h
    include/Athena/DNA.hpp
    include/Athena/DNAYaml.hpp
+    include/yaml.h
+    include/utf8proc.h
 )

 add_library(AthenaSakura
--- a/extern/yaml/CMakeLists.txt
+++ b/extern/yaml/CMakeLists.txt
@ -5,9 +5,7 @@ set(YAML_VERSION_STRING "${YAML_VERSION_MAJOR}.${YAML_VERSION_MINOR}.${YAML_VERS

 file(GLOB SRC src/*.c)

-include_directories(include win32)
-add_library(AthenaLibYaml STATIC ${SRC} include/yaml.h)
+include_directories(../../include)
+add_library(AthenaLibYaml STATIC ${SRC})

-set(YAML_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE PATH "YAML include path" FORCE)
-#install(DIRECTORY include/ DESTINATION include/Athena COMPONENT yaml)
 install(TARGETS AthenaLibYaml DESTINATION lib COMPONENT yaml)
--- a/extern/yaml/include/yaml.h
+++ b/extern/yaml/include/yaml.h
--- a/include/Athena/DNAYaml.hpp
+++ b/include/Athena/DNAYaml.hpp
@ -7,15 +7,9 @@
 * Any changes to the types or namespacing must be reflected in 'atdna/main.cpp'
 */

-#if _WIN32
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN 1
-#endif
-#include <windows.h>
-#endif
-
 #include <string.h>
 #include <yaml.h>
+#include <utf8proc.h>
 #include "DNA.hpp"

 namespace Athena
@ -381,44 +375,40 @@ inline std::unique_ptr<YAMLNode> ValToNode(const char* val)
 template <>
 inline std::wstring NodeToVal(const YAMLNode* node)
 {
-#if _WIN32
-    int len = MultiByteToWideChar(CP_UTF8, 0, node->m_scalarString.c_str(), node->m_scalarString.size(), nullptr, 0);
-    std::wstring retval(len, L'\0');
-    MultiByteToWideChar(CP_UTF8, 0, node->m_scalarString.c_str(), node->m_scalarString.size(), &retval[0], len);
-    return retval;
-#else
    std::wstring retval;
    retval.reserve(node->m_scalarString.length());
-    const char* buf = node->m_scalarString.c_str();
-    std::mbstate_t state = {};
+    const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(node->m_scalarString.c_str());
    while (*buf)
    {
-        wchar_t wc;
-        buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
-        retval += wc;
+        utf8proc_int32_t wc;
+        utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
+        if (len < 0)
+        {
+            atWarning("invalid UTF-8 character while decoding");
+            return retval;
+        }
+        buf += len;
+        retval += wchar_t(wc);
    }
    return retval;
-#endif
 }

 template <>
 inline std::unique_ptr<YAMLNode> ValToNode(const std::wstring& val)
 {
    YAMLNode* ret = new YAMLNode(YAML_SCALAR_NODE);
-#if _WIN32
-    int len = WideCharToMultiByte(CP_UTF8, 0, val.c_str(), val.size(), nullptr, 0, nullptr, nullptr);
-    ret->m_scalarString.assign(len, '\0');
-    WideCharToMultiByte(CP_UTF8, 0, val.c_str(), val.size(), &ret->m_scalarString[0], len, nullptr, nullptr);
-#else
    ret->m_scalarString.reserve(val.length());
-    std::mbstate_t state = {};
    for (wchar_t ch : val)
    {
-        char mb[MB_LEN_MAX];
-        int c = std::wcrtomb(mb, ch, &state);
-        ret->m_scalarString.append(mb, c);
+        utf8proc_uint8_t mb[4];
+        utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(ch), mb);
+        if (c < 0)
+        {
+            atWarning("invalid UTF-8 character while encoding");
+            return std::unique_ptr<YAMLNode>(ret);
+        }
+        ret->m_scalarString.append(reinterpret_cast<char*>(mb), c);
    }
-#endif
    return std::unique_ptr<YAMLNode>(ret);
 }

--- a/include/Athena/IStreamReader.hpp
+++ b/include/Athena/IStreamReader.hpp
@ -1,15 +1,9 @@
 #ifndef ISTREAMREADER_HPP
 #define ISTREAMREADER_HPP

-#if _WIN32
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN 1
-#endif
-#include <windows.h>
-#endif
-
 #include <memory>
 #include <functional>
+#include "utf8proc.h"
 #include "IStream.hpp"

 namespace Athena
@ -597,32 +591,10 @@ public:
     */
    inline std::string readWStringAsString(atInt32 fixedLen = -1)
    {
-#if _WIN32
-        std::wstring wstr;
-        atUint16 chr = readUint16();
-
-        atInt32 i;
-        for (i=0 ;; ++i)
-        {
-            if (fixedLen >= 0 && i >= fixedLen - 1)
-                break;
-
-            if (!chr)
-                break;
-
-            wstr += chr;
-            chr = readUint16();
-        }
-
-        int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), nullptr, 0, nullptr, nullptr);
-        std::string retval(len, '\0');
-        WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), &retval[0], len, nullptr, nullptr);
-#else
        std::string retval;
        atUint16 chr = readUint16();

        atInt32 i;
-        std::mbstate_t state = {};
        for (i=0 ;; ++i)
        {
            if (fixedLen >= 0 && i >= fixedLen - 1)
@ -631,12 +603,18 @@ public:
            if (!chr)
                break;

-            char mb[MB_LEN_MAX];
-            int c = std::wcrtomb(mb, chr, &state);
-            retval.append(mb, c);
+            utf8proc_uint8_t mb[4];
+            utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(chr), mb);
+            if (c < 0)
+            {
+                atWarning("invalid UTF-8 character while encoding");
+                return retval;
+            }
+
+            retval.append(reinterpret_cast<char*>(mb), c);
            chr = readUint16();
        }
-#endif
+
        if (fixedLen >= 0 && i < fixedLen)
            seek(fixedLen - i);

@ -645,32 +623,10 @@ public:

    inline std::string readWStringAsStringLittle(atInt32 fixedLen = -1)
    {
-#if _WIN32
-        std::wstring wstr;
-        atUint16 chr = readUint16Little();
-
-        atInt32 i;
-        for (i=0 ;; ++i)
-        {
-            if (fixedLen >= 0 && i >= fixedLen - 1)
-                break;
-
-            if (!chr)
-                break;
-
-            wstr += chr;
-            chr = readUint16Little();
-        }
-
-        int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), nullptr, 0, nullptr, nullptr);
-        std::string retval(len, '\0');
-        WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), &retval[0], len, nullptr, nullptr);
-#else
        std::string retval;
        atUint16 chr = readUint16Little();

        atInt32 i;
-        std::mbstate_t state = {};
        for (i=0 ;; ++i)
        {
            if (fixedLen >= 0 && i >= fixedLen - 1)
@ -679,12 +635,18 @@ public:
            if (!chr)
                break;

-            char mb[MB_LEN_MAX];
-            int c = std::wcrtomb(mb, chr, &state);
-            retval.append(mb, c);
+            utf8proc_uint8_t mb[4];
+            utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(chr), mb);
+            if (c < 0)
+            {
+                atWarning("invalid UTF-8 character while encoding");
+                return retval;
+            }
+
+            retval.append(reinterpret_cast<char*>(mb), c);
            chr = readUint16Little();
        }
-#endif
+
        if (fixedLen >= 0 && i < fixedLen)
            seek(fixedLen - i);

@ -693,32 +655,10 @@ public:

    inline std::string readWStringAsStringBig(atInt32 fixedLen = -1)
    {
-#if _WIN32
-        std::wstring wstr;
-        atUint16 chr = readUint16Big();
-
-        atInt32 i;
-        for (i=0 ;; ++i)
-        {
-            if (fixedLen >= 0 && i >= fixedLen - 1)
-                break;
-
-            if (!chr)
-                break;
-
-            wstr += chr;
-            chr = readUint16Big();
-        }
-
-        int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), nullptr, 0, nullptr, nullptr);
-        std::string retval(len, '\0');
-        WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.size(), &retval[0], len, nullptr, nullptr);
-#else
        std::string retval;
        atUint16 chr = readUint16Big();

        atInt32 i;
-        std::mbstate_t state = {};
        for (i = 0 ;; ++i)
        {
            if (fixedLen >= 0 && i >= fixedLen - 1)
@ -727,12 +667,18 @@ public:
            if (!chr)
                break;

-            char mb[MB_LEN_MAX];
-            int c = std::wcrtomb(mb, chr, &state);
-            retval.append(mb, c);
+            utf8proc_uint8_t mb[4];
+            utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(chr), mb);
+            if (c < 0)
+            {
+                atWarning("invalid UTF-8 character while encoding");
+                return retval;
+            }
+
+            retval.append(reinterpret_cast<char*>(mb), c);
            chr = readUint16Big();
        }
-#endif
+
        if (fixedLen >= 0 && i < fixedLen)
            seek(fixedLen - i);

--- a/include/Athena/IStreamWriter.hpp
+++ b/include/Athena/IStreamWriter.hpp
@ -1,13 +1,7 @@
 #ifndef ISTREAMWRITER_HPP
 #define ISTREAMWRITER_HPP

-#if _WIN32
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN 1
-#endif
-#include <windows.h>
-#endif
-
+#include "utf8proc.h"
 #include "IStream.hpp"

 namespace Athena
@ -444,44 +438,21 @@ public:
    inline void writeStringAsWString(const std::string& str, atInt32 fixedLen = -1)
    {
       std::string tmpStr = "\xEF\xBB\xBF" + str;
-
-#if _WIN32
-       int len = MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), nullptr, 0);
-       std::wstring retval(len, L'\0');
-       MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), &retval[0], len);
-       if (fixedLen < 0)
-       {
-           for (wchar_t ch : retval)
-           {
-               if (ch != 0xFEFF)
-                   writeUint16(ch);
-           }
-           writeUint16(0);
-       }
-       else
-       {
-           for (atInt32 i=0 ; i<fixedLen ; ++i)
-           {
-               wchar_t wc = retval[i];
-               if (wc == 0xFEFF)
-               {
-                   --i;
-                   continue;
-               }
-               writeUint16(wc);
-           }
-       }
-#else
-       const char* buf = tmpStr.c_str();
-       std::mbstate_t state = {};
+       const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(tmpStr.c_str());
       if (fixedLen < 0)
       {
           while (*buf)
           {
-               wchar_t wc;
-               buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
+               utf8proc_int32_t wc;
+               utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
+               if (len < 0)
+               {
+                   atWarning("invalid UTF-8 character while decoding");
+                   return;
+               }
+               buf += len;
               if (wc != 0xFEFF)
-                   writeUint16(wc);
+                   writeUint16(atUint16(wc));
           }
           writeUint16(0);
       }
@ -489,9 +460,17 @@ public:
       {
           for (atInt32 i=0 ; i<fixedLen ; ++i)
           {
-               wchar_t wc = 0;
+               utf8proc_int32_t wc = 0;
               if (*buf)
-                   buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
+               {
+                   utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
+                   if (len < 0)
+                   {
+                       atWarning("invalid UTF-8 character while decoding");
+                       return;
+                   }
+                   buf += len;
+               }

               if (wc == 0xFEFF)
               {
@ -499,53 +478,29 @@ public:
                   continue;
               }

-               writeUint16(wc);
+               writeUint16(atUint16(wc));
           }
       }
-#endif
    }

    inline void writeStringAsWStringLittle(const std::string& str, atInt32 fixedLen = -1)
    {
        std::string tmpStr = "\xEF\xBB\xBF" + str;
-
-#if _WIN32
-        int len = MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), nullptr, 0);
-        std::wstring retval(len, L'\0');
-        MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), &retval[0], len);
-        if (fixedLen < 0)
-        {
-            for (wchar_t ch : retval)
-            {
-                if (ch != 0xFEFF)
-                    writeUint16(ch);
-            }
-            writeUint16Little(0);
-        }
-        else
-        {
-            for (atInt32 i = 0; i<fixedLen; ++i)
-            {
-                wchar_t wc = retval[i];
-                if (wc == 0xFEFF)
-                {
-                    --i;
-                    continue;
-                }
-                writeUint16Little(wc);
-            }
-        }
-#else
-        const char* buf = tmpStr.c_str();
-        std::mbstate_t state = {};
+        const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(tmpStr.c_str());
        if (fixedLen < 0)
        {
            while (*buf)
            {
-                wchar_t wc;
-                buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
+                utf8proc_int32_t wc;
+                utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
+                if (len < 0)
+                {
+                    atWarning("invalid UTF-8 character while decoding");
+                    return;
+                }
+                buf += len;
                if (wc != 0xFEFF)
-                    writeUint16Little(wc);
+                    writeUint16Little(atUint16(wc));
            }
            writeUint16Little(0);
        }
@ -553,9 +508,17 @@ public:
        {
            for (atInt32 i=0 ; i<fixedLen ; ++i)
            {
-                wchar_t wc = 0;
+                utf8proc_int32_t wc = 0;
                if (*buf)
-                    buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
+                {
+                    utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
+                    if (len < 0)
+                    {
+                        atWarning("invalid UTF-8 character while decoding");
+                        return;
+                    }
+                    buf += len;
+                }

                if (wc == 0xFEFF)
                {
@ -563,53 +526,29 @@ public:
                    continue;
                }

-                writeUint16Little(wc);
+                writeUint16Little(atUint16(wc));
            }
        }
-#endif
    }

    inline void writeStringAsWStringBig(const std::string& str, atInt32 fixedLen = -1)
    {
        std::string tmpStr = "\xEF\xBB\xBF" + str;
-
-#if _WIN32
-        int len = MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), nullptr, 0);
-        std::wstring retval(len, L'\0');
-        MultiByteToWideChar(CP_UTF8, 0, tmpStr.c_str(), tmpStr.size(), &retval[0], len);
-        if (fixedLen < 0)
-        {
-            for (wchar_t ch : retval)
-            {
-                if (ch != 0xFEFF)
-                    writeUint16(ch);
-            }
-            writeUint16Big(0);
-        }
-        else
-        {
-            for (atInt32 i = 0; i<fixedLen; ++i)
-            {
-                wchar_t wc = retval[i];
-                if (wc == 0xFEFF)
-                {
-                    --i;
-                    continue;
-                }
-                writeUint16Big(wc);
-            }
-        }
-#else
-        const char* buf = tmpStr.c_str();
-        std::mbstate_t state = {};
+        const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(tmpStr.c_str());
        if (fixedLen < 0)
        {
            while (*buf)
            {
-                wchar_t wc;
-                buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
+                utf8proc_int32_t wc;
+                utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
+                if (len < 0)
+                {
+                    atWarning("invalid UTF-8 character while decoding");
+                    return;
+                }
+                buf += len;
                if (wc != 0xFEFF)
-                    writeUint16Big(wc);
+                    writeUint16Big(atUint16(wc));
            }
            writeUint16Big(0);
        }
@ -617,9 +556,17 @@ public:
        {
            for (atInt32 i=0 ; i<fixedLen ; ++i)
            {
-                wchar_t wc = 0;
+                utf8proc_int32_t wc = 0;
                if (*buf)
-                    buf += std::mbrtowc(&wc, buf, MB_LEN_MAX, &state);
+                {
+                    utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
+                    if (len < 0)
+                    {
+                        atWarning("invalid UTF-8 character while decoding");
+                        return;
+                    }
+                    buf += len;
+                }

                if (wc == 0xFEFF)
                {
@ -627,10 +574,9 @@ public:
                    continue;
                }

-                writeUint16Big(wc);
+                writeUint16Big(atUint16(wc));
            }
        }
-#endif
    }

    /*! \brief Writes an string to the buffer and advances the buffer.
--- a/include/utf8proc.h
+++ b/include/utf8proc.h
@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+/** 
+ * @mainpage
+ *
+ * utf8proc is a free/open-source (MIT/expat licensed) C library
+ * providing Unicode normalization, case-folding, and other operations
+ * for strings in the UTF-8 encoding, supporting Unicode version
+ * 7.0.0.  See the utf8proc home page (http://julialang.org/utf8proc/)
+ * for downloads and other information, or the source code on github
+ * (https://github.com/JuliaLang/utf8proc).
+ *
+ * For the utf8proc API documentation, see: @ref utf8proc.h
+ *
+ * The features of utf8proc include:
+ *
+ * - Transformation of strings (@ref utf8proc_map) to:
+ *    - decompose (@ref UTF8PROC_DECOMPOSE) or compose (@ref UTF8PROC_COMPOSE) Unicode combining characters (http://en.wikipedia.org/wiki/Combining_character)
+ *    - canonicalize Unicode compatibility characters (@ref UTF8PROC_COMPAT)
+ *    - strip "ignorable" (@ref UTF8PROC_IGNORE) characters, control characters (@ref UTF8PROC_STRIPCC), or combining characters such as accents (@ref UTF8PROC_STRIPMARK)
+ *    - case-folding (@ref UTF8PROC_CASEFOLD)
+ * - Unicode normalization: @ref utf8proc_NFD, @ref utf8proc_NFC, @ref utf8proc_NFKD, @ref utf8proc_NFKC
+ * - Detecting grapheme boundaries (@ref utf8proc_grapheme_break and @ref UTF8PROC_CHARBOUND)
+ * - Character-width computation: @ref utf8proc_charwidth
+ * - Classification of characters by Unicode category: @ref utf8proc_category and @ref utf8proc_category_string
+ * - Encode (@ref utf8proc_encode_char) and decode (@ref utf8proc_iterate) Unicode codepoints to/from UTF-8.
+ */
+
+/** @file */
+
+#ifndef UTF8PROC_H
+#define UTF8PROC_H
+
+/** @name API version
+ *  
+ * The utf8proc API version MAJOR.MINOR.PATCH, following
+ * semantic-versioning rules (http://semver.org) based on API
+ * compatibility.
+ *
+ * This is also returned at runtime by @ref utf8proc_version; however, the
+ * runtime version may append a string like "-dev" to the version number
+ * for prerelease versions.
+ *
+ * @note The shared-library version number in the Makefile may be different,
+ *       being based on ABI compatibility rather than API compatibility.
+ */
+/** @{ */
+/** The MAJOR version number (increased when backwards API compatibility is broken). */
+#define UTF8PROC_VERSION_MAJOR 1
+/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
+#define UTF8PROC_VERSION_MINOR 3
+/** The PATCH version (increased for fixes that do not change the API). */
+#define UTF8PROC_VERSION_PATCH 0
+/** @} */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#ifdef _MSC_VER
+typedef signed char utf8proc_int8_t;
+typedef unsigned char utf8proc_uint8_t;
+typedef short utf8proc_int16_t;
+typedef unsigned short utf8proc_uint16_t;
+typedef int utf8proc_int32_t;
+typedef unsigned int utf8proc_uint32_t;
+#  ifdef _WIN64
+typedef __int64 utf8proc_ssize_t;
+typedef unsigned __int64 utf8proc_size_t;
+#  else
+typedef int utf8proc_ssize_t;
+typedef unsigned int utf8proc_size_t;
+#  endif
+#  ifndef __cplusplus
+typedef unsigned char utf8proc_bool;
+enum {false, true};
+#  else
+typedef bool utf8proc_bool;
+#  endif
+#else
+#  include <stdbool.h>
+#  include <inttypes.h>
+typedef int8_t utf8proc_int8_t;
+typedef uint8_t utf8proc_uint8_t;
+typedef int16_t utf8proc_int16_t;
+typedef uint16_t utf8proc_uint16_t;
+typedef int32_t utf8proc_int32_t;
+typedef uint32_t utf8proc_uint32_t;
+typedef size_t utf8proc_size_t;
+typedef ssize_t utf8proc_ssize_t;
+typedef bool utf8proc_bool;
+#endif
+#include <limits.h>
+
+/** @name Error codes
+ * Error codes being returned by almost all functions.
+ */
+/** @{ */
+/** Memory could not be allocated. */
+#define UTF8PROC_ERROR_NOMEM -1
+/** The given string is too long to be processed. */
+#define UTF8PROC_ERROR_OVERFLOW -2
+/** The given string is not a legal UTF-8 string. */
+#define UTF8PROC_ERROR_INVALIDUTF8 -3
+/** The @ref UTF8PROC_REJECTNA flag was set and an unassigned codepoint was found. */
+#define UTF8PROC_ERROR_NOTASSIGNED -4
+/** Invalid options have been used. */
+#define UTF8PROC_ERROR_INVALIDOPTS -5
+/** @} */
+
+#define UTF8PROC_cont(ch)  (((ch) & 0xc0) == 0x80)
+
+/**
+ * Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
+ * The maximum number of bytes read is `strlen`, unless `strlen` is
+ * negative (in which case up to 4 bytes are read).
+ *
+ * If a valid codepoint could be read, it is stored in the variable
+ * pointed to by `codepoint_ref`, otherwise that variable will be set to -1.
+ * In case of success, the number of bytes read is returned; otherwise, a
+ * negative error code is returned.
+ */
+static inline utf8proc_ssize_t utf8proc_iterate(
+  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
+) {
+  utf8proc_uint32_t uc;
+  const utf8proc_uint8_t *end;
+
+  *dst = -1;
+  if (!strlen) return 0;
+  end = str + ((strlen < 0) ? 4 : strlen);
+  uc = *str++;
+  if (uc < 0x80) {
+    *dst = uc;
+    return 1;
+  }
+  // Must be between 0xc2 and 0xf4 inclusive to be valid
+  if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
+  if (uc < 0xe0) {         // 2-byte sequence
+     // Must have valid continuation character
+     if (!UTF8PROC_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
+     *dst = ((uc & 0x1f)<<6) | (*str & 0x3f);
+     return 2;
+  }
+  if (uc < 0xf0) {        // 3-byte sequence
+     if ((str + 1 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]))
+        return UTF8PROC_ERROR_INVALIDUTF8;
+     // Check for surrogate chars
+     if (uc == 0xed && *str > 0x9f)
+         return UTF8PROC_ERROR_INVALIDUTF8;
+     uc = ((uc & 0xf)<<12) | ((*str & 0x3f)<<6) | (str[1] & 0x3f);
+     if (uc < 0x800)
+         return UTF8PROC_ERROR_INVALIDUTF8;
+     *dst = uc;
+     return 3;
+  }
+  // 4-byte sequence
+  // Must have 3 valid continuation characters
+  if ((str + 2 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]) || !UTF8PROC_cont(str[2]))
+     return UTF8PROC_ERROR_INVALIDUTF8;
+  // Make sure in correct range (0x10000 - 0x10ffff)
+  if (uc == 0xf0) {
+    if (*str < 0x90) return UTF8PROC_ERROR_INVALIDUTF8;
+  } else if (uc == 0xf4) {
+    if (*str > 0x8f) return UTF8PROC_ERROR_INVALIDUTF8;
+  }
+  *dst = ((uc & 7)<<18) | ((*str & 0x3f)<<12) | ((str[1] & 0x3f)<<6) | (str[2] & 0x3f);
+  return 4;
+}
+
+/**
+ * Encodes the codepoint as an UTF-8 string in the byte array pointed
+ * to by `dst`. This array must be at least 4 bytes long.
+ *
+ * In case of success the number of bytes written is returned, and
+ * otherwise 0 is returned.
+ *
+ * This function does not check whether `codepoint` is valid Unicode.
+ */
+static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
+  if (uc < 0x00) {
+    return 0;
+  } else if (uc < 0x80) {
+    dst[0] = uc;
+    return 1;
+  } else if (uc < 0x800) {
+    dst[0] = 0xC0 + (uc >> 6);
+    dst[1] = 0x80 + (uc & 0x3F);
+    return 2;
+  // Note: we allow encoding 0xd800-0xdfff here, so as not to change
+  // the API, however, these are actually invalid in UTF-8
+  } else if (uc < 0x10000) {
+    dst[0] = 0xE0 + (uc >> 12);
+    dst[1] = 0x80 + ((uc >> 6) & 0x3F);
+    dst[2] = 0x80 + (uc & 0x3F);
+    return 3;
+  } else if (uc < 0x110000) {
+    dst[0] = 0xF0 + (uc >> 18);
+    dst[1] = 0x80 + ((uc >> 12) & 0x3F);
+    dst[2] = 0x80 + ((uc >> 6) & 0x3F);
+    dst[3] = 0x80 + (uc & 0x3F);
+    return 4;
+  } else return 0;
+}
+
+#endif
+
--- a/src/Athena/SkywardSwordQuest.cpp
+++ b/src/Athena/SkywardSwordQuest.cpp
@ -16,6 +16,7 @@

 #include "Athena/SkywardSwordQuest.hpp"
 #include "Athena/Checksums.hpp"
+#include "utf8proc.h"
 #include <sstream>

 namespace Athena
@ -72,7 +73,7 @@ void SkywardSwordQuest::setPlayerName(const std::string& name)
    if (name.length() > 8)
        atDebug("WARNING: name cannot be greater than 8 characters, automatically truncating");

-    const char* buf = name.c_str();
+    const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(name.c_str());
    for (atUint32 i = 0; i < 8; i++)
    {
        atUint16& c = *(atUint16*)(m_data.get() + priv::NAME_OFFSET + (i * 2));
@ -83,9 +84,15 @@ void SkywardSwordQuest::setPlayerName(const std::string& name)
            continue;
        }

-        wchar_t wc;
-        buf += std::mbtowc(&wc, buf, MB_CUR_MAX);
-        c = wc;
+        utf8proc_int32_t wc;
+        utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
+        if (len < 0)
+        {
+            atError("invalid UTF-8 string");
+            return;
+        }
+        buf += len;
+        c = atUint16(wc);
        utility::BigUint16(c);
    }
 }
@ -102,9 +109,10 @@ std::string SkywardSwordQuest::playerName() const
            break;

        utility::BigUint16(c);
-        char mb[4];
-        int cs = std::wctomb(mb, c);
-        val.append(mb, cs);
+        utf8proc_uint8_t mb[4];
+        utf8proc_ssize_t cs = utf8proc_encode_char(utf8proc_int32_t(c), mb);
+        if (cs)
+            val.append(reinterpret_cast<char*>(mb), cs);
    }

    return val;