mirror of
https://github.com/libAthena/athena.git
synced 2025-06-23 23:13:42 +00:00
Remove deprecated cstdbool from utf8proc
This commit is contained in:
parent
4f3531dd1f
commit
b37c3d3dcb
@ -20,8 +20,7 @@
|
|||||||
* DEALINGS IN THE SOFTWARE.
|
* DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
/**
|
|
||||||
* @mainpage
|
* @mainpage
|
||||||
*
|
*
|
||||||
* utf8proc is a free/open-source (MIT/expat licensed) C library
|
* utf8proc is a free/open-source (MIT/expat licensed) C library
|
||||||
@ -36,9 +35,11 @@
|
|||||||
* The features of utf8proc include:
|
* The features of utf8proc include:
|
||||||
*
|
*
|
||||||
* - Transformation of strings (@ref utf8proc_map) to:
|
* - Transformation of strings (@ref utf8proc_map) to:
|
||||||
* - decompose (@ref UTF8PROC_DECOMPOSE) or compose (@ref UTF8PROC_COMPOSE) Unicode combining characters (http://en.wikipedia.org/wiki/Combining_character)
|
* - decompose (@ref UTF8PROC_DECOMPOSE) or compose (@ref UTF8PROC_COMPOSE) Unicode combining characters
|
||||||
|
* (http://en.wikipedia.org/wiki/Combining_character)
|
||||||
* - canonicalize Unicode compatibility characters (@ref UTF8PROC_COMPAT)
|
* - canonicalize Unicode compatibility characters (@ref UTF8PROC_COMPAT)
|
||||||
* - strip "ignorable" (@ref UTF8PROC_IGNORE) characters, control characters (@ref UTF8PROC_STRIPCC), or combining characters such as accents (@ref UTF8PROC_STRIPMARK)
|
* - strip "ignorable" (@ref UTF8PROC_IGNORE) characters, control characters (@ref UTF8PROC_STRIPCC), or combining
|
||||||
|
* characters such as accents (@ref UTF8PROC_STRIPMARK)
|
||||||
* - case-folding (@ref UTF8PROC_CASEFOLD)
|
* - case-folding (@ref UTF8PROC_CASEFOLD)
|
||||||
* - Unicode normalization: @ref utf8proc_NFD, @ref utf8proc_NFC, @ref utf8proc_NFKD, @ref utf8proc_NFKC
|
* - Unicode normalization: @ref utf8proc_NFD, @ref utf8proc_NFC, @ref utf8proc_NFKD, @ref utf8proc_NFKC
|
||||||
* - Detecting grapheme boundaries (@ref utf8proc_grapheme_break and @ref UTF8PROC_CHARBOUND)
|
* - Detecting grapheme boundaries (@ref utf8proc_grapheme_break and @ref UTF8PROC_CHARBOUND)
|
||||||
@ -53,7 +54,7 @@
|
|||||||
#define UTF8PROC_H
|
#define UTF8PROC_H
|
||||||
|
|
||||||
/** @name API version
|
/** @name API version
|
||||||
*
|
*
|
||||||
* The utf8proc API version MAJOR.MINOR.PATCH, following
|
* The utf8proc API version MAJOR.MINOR.PATCH, following
|
||||||
* semantic-versioning rules (http://semver.org) based on API
|
* semantic-versioning rules (http://semver.org) based on API
|
||||||
* compatibility.
|
* compatibility.
|
||||||
@ -83,26 +84,25 @@ typedef short utf8proc_int16_t;
|
|||||||
typedef unsigned short utf8proc_uint16_t;
|
typedef unsigned short utf8proc_uint16_t;
|
||||||
typedef int utf8proc_int32_t;
|
typedef int utf8proc_int32_t;
|
||||||
typedef unsigned int utf8proc_uint32_t;
|
typedef unsigned int utf8proc_uint32_t;
|
||||||
# ifdef _WIN64
|
#ifdef _WIN64
|
||||||
typedef __int64 utf8proc_ssize_t;
|
typedef __int64 utf8proc_ssize_t;
|
||||||
typedef unsigned __int64 utf8proc_size_t;
|
typedef unsigned __int64 utf8proc_size_t;
|
||||||
# else
|
#else
|
||||||
typedef int utf8proc_ssize_t;
|
typedef int utf8proc_ssize_t;
|
||||||
typedef unsigned int utf8proc_size_t;
|
typedef unsigned int utf8proc_size_t;
|
||||||
# endif
|
#endif
|
||||||
# ifndef __cplusplus
|
#ifndef __cplusplus
|
||||||
typedef unsigned char utf8proc_bool;
|
typedef unsigned char utf8proc_bool;
|
||||||
enum {false, true};
|
enum { false, true };
|
||||||
# else
|
#else
|
||||||
typedef bool utf8proc_bool;
|
typedef bool utf8proc_bool;
|
||||||
# endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
# include <cstdbool>
|
#include <cinttypes>
|
||||||
# include <cinttypes>
|
|
||||||
#else
|
#else
|
||||||
# include <stdbool.h>
|
#include <stdbool.h>
|
||||||
# include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#endif
|
#endif
|
||||||
typedef int8_t utf8proc_int8_t;
|
typedef int8_t utf8proc_int8_t;
|
||||||
typedef uint8_t utf8proc_uint8_t;
|
typedef uint8_t utf8proc_uint8_t;
|
||||||
@ -115,9 +115,9 @@ typedef ssize_t utf8proc_ssize_t;
|
|||||||
typedef bool utf8proc_bool;
|
typedef bool utf8proc_bool;
|
||||||
#endif
|
#endif
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
# include <climits>
|
#include <climits>
|
||||||
#else
|
#else
|
||||||
# include <limits.h>
|
#include <limits.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** @name Error codes
|
/** @name Error codes
|
||||||
@ -136,7 +136,7 @@ typedef bool utf8proc_bool;
|
|||||||
#define UTF8PROC_ERROR_INVALIDOPTS -5
|
#define UTF8PROC_ERROR_INVALIDOPTS -5
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
#define UTF8PROC_cont(ch) (((ch) & 0xc0) == 0x80)
|
#define UTF8PROC_cont(ch) (((ch) & 0xc0) == 0x80)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
|
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
|
||||||
@ -148,14 +148,14 @@ typedef bool utf8proc_bool;
|
|||||||
* In case of success, the number of bytes read is returned; otherwise, a
|
* In case of success, the number of bytes read is returned; otherwise, a
|
||||||
* negative error code is returned.
|
* negative error code is returned.
|
||||||
*/
|
*/
|
||||||
static inline utf8proc_ssize_t utf8proc_iterate(
|
static inline utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t* str, utf8proc_ssize_t strlen,
|
||||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
|
utf8proc_int32_t* dst) {
|
||||||
) {
|
|
||||||
utf8proc_uint32_t uc;
|
utf8proc_uint32_t uc;
|
||||||
const utf8proc_uint8_t *end;
|
const utf8proc_uint8_t* end;
|
||||||
|
|
||||||
*dst = -1;
|
*dst = -1;
|
||||||
if (!strlen) return 0;
|
if (!strlen)
|
||||||
|
return 0;
|
||||||
end = str + ((strlen < 0) ? 4 : strlen);
|
end = str + ((strlen < 0) ? 4 : strlen);
|
||||||
uc = *str++;
|
uc = *str++;
|
||||||
if (uc < 0x80) {
|
if (uc < 0x80) {
|
||||||
@ -163,36 +163,40 @@ static inline utf8proc_ssize_t utf8proc_iterate(
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
// Must be between 0xc2 and 0xf4 inclusive to be valid
|
// Must be between 0xc2 and 0xf4 inclusive to be valid
|
||||||
if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
|
if ((uc - 0xc2) > (0xf4 - 0xc2))
|
||||||
if (uc < 0xe0) { // 2-byte sequence
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
// Must have valid continuation character
|
if (uc < 0xe0) { // 2-byte sequence
|
||||||
if (!UTF8PROC_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
|
// Must have valid continuation character
|
||||||
*dst = ((uc & 0x1f)<<6) | (*str & 0x3f);
|
if (!UTF8PROC_cont(*str))
|
||||||
return 2;
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
|
*dst = ((uc & 0x1f) << 6) | (*str & 0x3f);
|
||||||
|
return 2;
|
||||||
}
|
}
|
||||||
if (uc < 0xf0) { // 3-byte sequence
|
if (uc < 0xf0) { // 3-byte sequence
|
||||||
if ((str + 1 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]))
|
if ((str + 1 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]))
|
||||||
return UTF8PROC_ERROR_INVALIDUTF8;
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
// Check for surrogate chars
|
// Check for surrogate chars
|
||||||
if (uc == 0xed && *str > 0x9f)
|
if (uc == 0xed && *str > 0x9f)
|
||||||
return UTF8PROC_ERROR_INVALIDUTF8;
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
uc = ((uc & 0xf)<<12) | ((*str & 0x3f)<<6) | (str[1] & 0x3f);
|
uc = ((uc & 0xf) << 12) | ((*str & 0x3f) << 6) | (str[1] & 0x3f);
|
||||||
if (uc < 0x800)
|
if (uc < 0x800)
|
||||||
return UTF8PROC_ERROR_INVALIDUTF8;
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
*dst = uc;
|
*dst = uc;
|
||||||
return 3;
|
return 3;
|
||||||
}
|
}
|
||||||
// 4-byte sequence
|
// 4-byte sequence
|
||||||
// Must have 3 valid continuation characters
|
// Must have 3 valid continuation characters
|
||||||
if ((str + 2 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]) || !UTF8PROC_cont(str[2]))
|
if ((str + 2 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]) || !UTF8PROC_cont(str[2]))
|
||||||
return UTF8PROC_ERROR_INVALIDUTF8;
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
// Make sure in correct range (0x10000 - 0x10ffff)
|
// Make sure in correct range (0x10000 - 0x10ffff)
|
||||||
if (uc == 0xf0) {
|
if (uc == 0xf0) {
|
||||||
if (*str < 0x90) return UTF8PROC_ERROR_INVALIDUTF8;
|
if (*str < 0x90)
|
||||||
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
} else if (uc == 0xf4) {
|
} else if (uc == 0xf4) {
|
||||||
if (*str > 0x8f) return UTF8PROC_ERROR_INVALIDUTF8;
|
if (*str > 0x8f)
|
||||||
|
return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
}
|
}
|
||||||
*dst = ((uc & 7)<<18) | ((*str & 0x3f)<<12) | ((str[1] & 0x3f)<<6) | (str[2] & 0x3f);
|
*dst = ((uc & 7) << 18) | ((*str & 0x3f) << 12) | ((str[1] & 0x3f) << 6) | (str[2] & 0x3f);
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -205,7 +209,7 @@ static inline utf8proc_ssize_t utf8proc_iterate(
|
|||||||
*
|
*
|
||||||
* This function does not check whether `codepoint` is valid Unicode.
|
* This function does not check whether `codepoint` is valid Unicode.
|
||||||
*/
|
*/
|
||||||
static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t* dst) {
|
||||||
if (uc < 0x00) {
|
if (uc < 0x00) {
|
||||||
return 0;
|
return 0;
|
||||||
} else if (uc < 0x80) {
|
} else if (uc < 0x80) {
|
||||||
@ -215,8 +219,8 @@ static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8pro
|
|||||||
dst[0] = 0xC0 + (uc >> 6);
|
dst[0] = 0xC0 + (uc >> 6);
|
||||||
dst[1] = 0x80 + (uc & 0x3F);
|
dst[1] = 0x80 + (uc & 0x3F);
|
||||||
return 2;
|
return 2;
|
||||||
// Note: we allow encoding 0xd800-0xdfff here, so as not to change
|
// Note: we allow encoding 0xd800-0xdfff here, so as not to change
|
||||||
// the API, however, these are actually invalid in UTF-8
|
// the API, however, these are actually invalid in UTF-8
|
||||||
} else if (uc < 0x10000) {
|
} else if (uc < 0x10000) {
|
||||||
dst[0] = 0xE0 + (uc >> 12);
|
dst[0] = 0xE0 + (uc >> 12);
|
||||||
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
|
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
|
||||||
@ -228,81 +232,69 @@ static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8pro
|
|||||||
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
|
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
|
||||||
dst[3] = 0x80 + (uc & 0x3F);
|
dst[3] = 0x80 + (uc & 0x3F);
|
||||||
return 4;
|
return 4;
|
||||||
} else return 0;
|
} else
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
class UTF8Iterator
|
class UTF8Iterator {
|
||||||
{
|
std::string_view::const_iterator m_it;
|
||||||
std::string_view::const_iterator m_it;
|
|
||||||
public:
|
|
||||||
using iterator_category = std::forward_iterator_tag;
|
|
||||||
using value_type = uint32_t;
|
|
||||||
using difference_type = std::ptrdiff_t;
|
|
||||||
using pointer = uint32_t*;
|
|
||||||
using reference = uint32_t&;
|
|
||||||
|
|
||||||
UTF8Iterator(const std::string_view::const_iterator& it) : m_it(it) {}
|
public:
|
||||||
UTF8Iterator& operator+=(size_t v)
|
using iterator_category = std::forward_iterator_tag;
|
||||||
{
|
using value_type = uint32_t;
|
||||||
for (size_t i=0 ; i<v ; ++i)
|
using difference_type = std::ptrdiff_t;
|
||||||
{
|
using pointer = uint32_t*;
|
||||||
utf8proc_int32_t dummy;
|
using reference = uint32_t&;
|
||||||
utf8proc_ssize_t sz = utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &dummy);
|
|
||||||
|
UTF8Iterator(const std::string_view::const_iterator& it) : m_it(it) {}
|
||||||
|
UTF8Iterator& operator+=(size_t v) {
|
||||||
|
for (size_t i = 0; i < v; ++i) {
|
||||||
|
utf8proc_int32_t dummy;
|
||||||
|
utf8proc_ssize_t sz = utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &dummy);
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
if (*m_it == '\0')
|
if (*m_it == '\0') {
|
||||||
{
|
fprintf(stderr, "ERROR! UTF8-iterator null-term fail\n");
|
||||||
fprintf(stderr, "ERROR! UTF8-iterator null-term fail\n");
|
abort();
|
||||||
abort();
|
} else if (sz > 0)
|
||||||
}
|
m_it += sz;
|
||||||
else if (sz > 0)
|
else {
|
||||||
m_it += sz;
|
fprintf(stderr, "ERROR! UTF8Iterator character fail");
|
||||||
else
|
abort();
|
||||||
{
|
}
|
||||||
fprintf(stderr, "ERROR! UTF8Iterator character fail");
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
if (sz > 0)
|
if (sz > 0)
|
||||||
m_it += sz;
|
m_it += sz;
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
UTF8Iterator& operator++()
|
return *this;
|
||||||
{
|
}
|
||||||
return this->operator+=(1);
|
UTF8Iterator& operator++() { return this->operator+=(1); }
|
||||||
}
|
UTF8Iterator operator+(size_t v) const {
|
||||||
UTF8Iterator operator+(size_t v) const
|
UTF8Iterator ret(m_it);
|
||||||
{
|
ret += v;
|
||||||
UTF8Iterator ret(m_it);
|
return ret;
|
||||||
ret += v;
|
}
|
||||||
return ret;
|
uint32_t operator*() const {
|
||||||
}
|
utf8proc_int32_t ret;
|
||||||
uint32_t operator*() const
|
utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &ret);
|
||||||
{
|
return ret;
|
||||||
utf8proc_int32_t ret;
|
}
|
||||||
utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &ret);
|
std::string_view::const_iterator iter() const { return m_it; }
|
||||||
return ret;
|
size_t countTo(std::string_view::const_iterator end) const {
|
||||||
}
|
UTF8Iterator it(m_it);
|
||||||
std::string_view::const_iterator iter() const {return m_it;}
|
size_t ret = 0;
|
||||||
size_t countTo(std::string_view::const_iterator end) const
|
while (it.iter() < end && *it != '\0') {
|
||||||
{
|
++ret;
|
||||||
UTF8Iterator it(m_it);
|
++it;
|
||||||
size_t ret = 0;
|
|
||||||
while (it.iter() < end && *it != '\0')
|
|
||||||
{
|
|
||||||
++ret;
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user