2015-08-31 21:23:45 +00:00
|
|
|
#include <utf8proc.h>
|
2016-03-04 23:02:44 +00:00
|
|
|
#include "hecl/hecl.hpp"
|
2015-06-09 22:19:59 +00:00
|
|
|
|
2016-03-04 23:02:44 +00:00
|
|
|
namespace hecl
|
2015-06-09 22:19:59 +00:00
|
|
|
{
|
2018-01-10 06:16:18 +00:00
|
|
|
static logvisor::Module Log("hecl-wsconv");
|
2015-06-09 22:19:59 +00:00
|
|
|
|
2017-11-13 06:13:53 +00:00
|
|
|
std::string WideToUTF8(std::wstring_view src)
|
2015-06-09 22:19:59 +00:00
|
|
|
{
|
2015-08-25 07:02:10 +00:00
|
|
|
std::string retval;
|
|
|
|
retval.reserve(src.length());
|
|
|
|
for (wchar_t ch : src)
|
|
|
|
{
|
2015-08-31 21:23:45 +00:00
|
|
|
utf8proc_uint8_t mb[4];
|
|
|
|
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(ch), mb);
|
|
|
|
if (c < 0)
|
|
|
|
{
|
2018-01-10 06:16:18 +00:00
|
|
|
Log.report(logvisor::Warning, "invalid UTF-8 character while encoding");
|
2015-08-31 21:23:45 +00:00
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
retval.append(reinterpret_cast<char*>(mb), c);
|
2015-08-25 07:02:10 +00:00
|
|
|
}
|
|
|
|
return retval;
|
2015-06-09 22:19:59 +00:00
|
|
|
}
|
|
|
|
|
2017-11-13 06:13:53 +00:00
|
|
|
std::string Char16ToUTF8(std::u16string_view src)
|
2017-01-24 07:40:09 +00:00
|
|
|
{
|
|
|
|
std::string retval;
|
|
|
|
retval.reserve(src.length());
|
|
|
|
for (char16_t ch : src)
|
|
|
|
{
|
|
|
|
utf8proc_uint8_t mb[4];
|
|
|
|
utf8proc_ssize_t c = utf8proc_encode_char(utf8proc_int32_t(ch), mb);
|
|
|
|
if (c < 0)
|
|
|
|
{
|
2018-01-10 06:16:18 +00:00
|
|
|
Log.report(logvisor::Warning, "invalid UTF-8 character while encoding");
|
2017-01-24 07:40:09 +00:00
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
retval.append(reinterpret_cast<char*>(mb), c);
|
|
|
|
}
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2017-11-13 06:13:53 +00:00
|
|
|
std::wstring UTF8ToWide(std::string_view src)
|
2015-06-09 22:19:59 +00:00
|
|
|
{
|
2015-08-25 07:02:10 +00:00
|
|
|
std::wstring retval;
|
|
|
|
retval.reserve(src.length());
|
2017-11-13 06:13:53 +00:00
|
|
|
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(src.data());
|
2015-08-25 07:02:10 +00:00
|
|
|
while (*buf)
|
|
|
|
{
|
2015-08-31 21:23:45 +00:00
|
|
|
utf8proc_int32_t wc;
|
|
|
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
|
|
|
if (len < 0)
|
|
|
|
{
|
2018-01-10 06:16:18 +00:00
|
|
|
Log.report(logvisor::Warning, "invalid UTF-8 character while decoding");
|
2015-08-31 21:23:45 +00:00
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
buf += len;
|
|
|
|
retval += wchar_t(wc);
|
2015-08-25 07:02:10 +00:00
|
|
|
}
|
|
|
|
return retval;
|
2015-06-09 22:19:59 +00:00
|
|
|
}
|
|
|
|
|
2017-11-13 06:13:53 +00:00
|
|
|
std::u16string UTF8ToChar16(std::string_view src)
|
2017-01-24 07:40:09 +00:00
|
|
|
{
|
|
|
|
std::u16string retval;
|
|
|
|
retval.reserve(src.length());
|
2017-11-13 06:13:53 +00:00
|
|
|
const utf8proc_uint8_t* buf = reinterpret_cast<const utf8proc_uint8_t*>(src.data());
|
2017-01-24 07:40:09 +00:00
|
|
|
while (*buf)
|
|
|
|
{
|
|
|
|
utf8proc_int32_t wc;
|
|
|
|
utf8proc_ssize_t len = utf8proc_iterate(buf, -1, &wc);
|
|
|
|
if (len < 0)
|
|
|
|
{
|
2018-01-10 06:16:18 +00:00
|
|
|
Log.report(logvisor::Warning, "invalid UTF-8 character while decoding");
|
2017-01-24 07:40:09 +00:00
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
buf += len;
|
|
|
|
retval += char16_t(wc);
|
|
|
|
}
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2015-06-09 22:19:59 +00:00
|
|
|
}
|