Remove deprecated cstdbool from utf8proc

2025-12-08 13:15:05 +00:00 · 2025-05-01 19:26:15 -07:00
parent 4f3531dd1f
commit b37c3d3dcb
1 changed files with 101 additions and 109 deletions
--- a/include/athena/utf8proc.h
+++ b/include/athena/utf8proc.h
@@ -20,8 +20,7 @@
 * DEALINGS IN THE SOFTWARE.
 */

-
-/** 
+/**
 * @mainpage
 *
 * utf8proc is a free/open-source (MIT/expat licensed) C library
@@ -36,9 +35,11 @@
 * The features of utf8proc include:
 *
 * - Transformation of strings (@ref utf8proc_map) to:
- *    - decompose (@ref UTF8PROC_DECOMPOSE) or compose (@ref UTF8PROC_COMPOSE) Unicode combining characters (http://en.wikipedia.org/wiki/Combining_character)
+ *    - decompose (@ref UTF8PROC_DECOMPOSE) or compose (@ref UTF8PROC_COMPOSE) Unicode combining characters
+ * (http://en.wikipedia.org/wiki/Combining_character)
 *    - canonicalize Unicode compatibility characters (@ref UTF8PROC_COMPAT)
- *    - strip "ignorable" (@ref UTF8PROC_IGNORE) characters, control characters (@ref UTF8PROC_STRIPCC), or combining characters such as accents (@ref UTF8PROC_STRIPMARK)
+ *    - strip "ignorable" (@ref UTF8PROC_IGNORE) characters, control characters (@ref UTF8PROC_STRIPCC), or combining
+ * characters such as accents (@ref UTF8PROC_STRIPMARK)
 *    - case-folding (@ref UTF8PROC_CASEFOLD)
 * - Unicode normalization: @ref utf8proc_NFD, @ref utf8proc_NFC, @ref utf8proc_NFKD, @ref utf8proc_NFKC
 * - Detecting grapheme boundaries (@ref utf8proc_grapheme_break and @ref UTF8PROC_CHARBOUND)
@@ -53,7 +54,7 @@
 #define UTF8PROC_H

 /** @name API version
- *  
+ *
 * The utf8proc API version MAJOR.MINOR.PATCH, following
 * semantic-versioning rules (http://semver.org) based on API
 * compatibility.
@@ -83,26 +84,25 @@ typedef short utf8proc_int16_t;
 typedef unsigned short utf8proc_uint16_t;
 typedef int utf8proc_int32_t;
 typedef unsigned int utf8proc_uint32_t;
-#  ifdef _WIN64
+#ifdef _WIN64
 typedef __int64 utf8proc_ssize_t;
 typedef unsigned __int64 utf8proc_size_t;
-#  else
+#else
 typedef int utf8proc_ssize_t;
 typedef unsigned int utf8proc_size_t;
-#  endif
-#  ifndef __cplusplus
+#endif
+#ifndef __cplusplus
 typedef unsigned char utf8proc_bool;
-enum {false, true};
-#  else
+enum { false, true };
+#else
 typedef bool utf8proc_bool;
-#  endif
+#endif
 #else
 #ifdef __cplusplus
-#  include <cstdbool>
-#  include <cinttypes>
+#include <cinttypes>
 #else
-#  include <stdbool.h>
-#  include <inttypes.h>
+#include <stdbool.h>
+#include <inttypes.h>
 #endif
 typedef int8_t utf8proc_int8_t;
 typedef uint8_t utf8proc_uint8_t;
@@ -115,9 +115,9 @@ typedef ssize_t utf8proc_ssize_t;
 typedef bool utf8proc_bool;
 #endif
 #ifdef __cplusplus
-#  include <climits>
+#include <climits>
 #else
-#  include <limits.h>
+#include <limits.h>
 #endif

 /** @name Error codes
@@ -136,7 +136,7 @@ typedef bool utf8proc_bool;
 #define UTF8PROC_ERROR_INVALIDOPTS -5
 /** @} */

-#define UTF8PROC_cont(ch)  (((ch) & 0xc0) == 0x80)
+#define UTF8PROC_cont(ch) (((ch) & 0xc0) == 0x80)

 /**
 * Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
@@ -148,14 +148,14 @@ typedef bool utf8proc_bool;
 * In case of success, the number of bytes read is returned; otherwise, a
 * negative error code is returned.
 */
-static inline utf8proc_ssize_t utf8proc_iterate(
-  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
-) {
+static inline utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t* str, utf8proc_ssize_t strlen,
+                                                utf8proc_int32_t* dst) {
  utf8proc_uint32_t uc;
-  const utf8proc_uint8_t *end;
+  const utf8proc_uint8_t* end;

  *dst = -1;
-  if (!strlen) return 0;
+  if (!strlen)
+    return 0;
  end = str + ((strlen < 0) ? 4 : strlen);
  uc = *str++;
  if (uc < 0x80) {
@@ -163,36 +163,40 @@ static inline utf8proc_ssize_t utf8proc_iterate(
    return 1;
  }
  // Must be between 0xc2 and 0xf4 inclusive to be valid
-  if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
-  if (uc < 0xe0) {         // 2-byte sequence
-     // Must have valid continuation character
-     if (!UTF8PROC_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
-     *dst = ((uc & 0x1f)<<6) | (*str & 0x3f);
-     return 2;
+  if ((uc - 0xc2) > (0xf4 - 0xc2))
+    return UTF8PROC_ERROR_INVALIDUTF8;
+  if (uc < 0xe0) { // 2-byte sequence
+    // Must have valid continuation character
+    if (!UTF8PROC_cont(*str))
+      return UTF8PROC_ERROR_INVALIDUTF8;
+    *dst = ((uc & 0x1f) << 6) | (*str & 0x3f);
+    return 2;
  }
-  if (uc < 0xf0) {        // 3-byte sequence
-     if ((str + 1 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]))
-        return UTF8PROC_ERROR_INVALIDUTF8;
-     // Check for surrogate chars
-     if (uc == 0xed && *str > 0x9f)
-         return UTF8PROC_ERROR_INVALIDUTF8;
-     uc = ((uc & 0xf)<<12) | ((*str & 0x3f)<<6) | (str[1] & 0x3f);
-     if (uc < 0x800)
-         return UTF8PROC_ERROR_INVALIDUTF8;
-     *dst = uc;
-     return 3;
+  if (uc < 0xf0) { // 3-byte sequence
+    if ((str + 1 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]))
+      return UTF8PROC_ERROR_INVALIDUTF8;
+    // Check for surrogate chars
+    if (uc == 0xed && *str > 0x9f)
+      return UTF8PROC_ERROR_INVALIDUTF8;
+    uc = ((uc & 0xf) << 12) | ((*str & 0x3f) << 6) | (str[1] & 0x3f);
+    if (uc < 0x800)
+      return UTF8PROC_ERROR_INVALIDUTF8;
+    *dst = uc;
+    return 3;
  }
  // 4-byte sequence
  // Must have 3 valid continuation characters
  if ((str + 2 >= end) || !UTF8PROC_cont(*str) || !UTF8PROC_cont(str[1]) || !UTF8PROC_cont(str[2]))
-     return UTF8PROC_ERROR_INVALIDUTF8;
+    return UTF8PROC_ERROR_INVALIDUTF8;
  // Make sure in correct range (0x10000 - 0x10ffff)
  if (uc == 0xf0) {
-    if (*str < 0x90) return UTF8PROC_ERROR_INVALIDUTF8;
+    if (*str < 0x90)
+      return UTF8PROC_ERROR_INVALIDUTF8;
  } else if (uc == 0xf4) {
-    if (*str > 0x8f) return UTF8PROC_ERROR_INVALIDUTF8;
+    if (*str > 0x8f)
+      return UTF8PROC_ERROR_INVALIDUTF8;
  }
-  *dst = ((uc & 7)<<18) | ((*str & 0x3f)<<12) | ((str[1] & 0x3f)<<6) | (str[2] & 0x3f);
+  *dst = ((uc & 7) << 18) | ((*str & 0x3f) << 12) | ((str[1] & 0x3f) << 6) | (str[2] & 0x3f);
  return 4;
 }

@@ -205,7 +209,7 @@ static inline utf8proc_ssize_t utf8proc_iterate(
 *
 * This function does not check whether `codepoint` is valid Unicode.
 */
-static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
+static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t* dst) {
  if (uc < 0x00) {
    return 0;
  } else if (uc < 0x80) {
@@ -215,8 +219,8 @@ static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8pro
    dst[0] = 0xC0 + (uc >> 6);
    dst[1] = 0x80 + (uc & 0x3F);
    return 2;
-  // Note: we allow encoding 0xd800-0xdfff here, so as not to change
-  // the API, however, these are actually invalid in UTF-8
+    // Note: we allow encoding 0xd800-0xdfff here, so as not to change
+    // the API, however, these are actually invalid in UTF-8
  } else if (uc < 0x10000) {
    dst[0] = 0xE0 + (uc >> 12);
    dst[1] = 0x80 + ((uc >> 6) & 0x3F);
@@ -228,81 +232,69 @@ static inline utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8pro
    dst[2] = 0x80 + ((uc >> 6) & 0x3F);
    dst[3] = 0x80 + (uc & 0x3F);
    return 4;
-  } else return 0;
+  } else
+    return 0;
 }

 #ifdef __cplusplus
 #include <iterator>
 #include <string>

-class UTF8Iterator
-{
-    std::string_view::const_iterator m_it;
-public:
-    using iterator_category = std::forward_iterator_tag;
-    using value_type = uint32_t;
-    using difference_type = std::ptrdiff_t;
-    using pointer = uint32_t*;
-    using reference = uint32_t&;
+class UTF8Iterator {
+  std::string_view::const_iterator m_it;

-    UTF8Iterator(const std::string_view::const_iterator& it) : m_it(it) {}
-    UTF8Iterator& operator+=(size_t v)
-    {
-        for (size_t i=0 ; i<v ; ++i)
-        {
-            utf8proc_int32_t dummy;
-            utf8proc_ssize_t sz = utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &dummy);
+public:
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = uint32_t;
+  using difference_type = std::ptrdiff_t;
+  using pointer = uint32_t*;
+  using reference = uint32_t&;
+
+  UTF8Iterator(const std::string_view::const_iterator& it) : m_it(it) {}
+  UTF8Iterator& operator+=(size_t v) {
+    for (size_t i = 0; i < v; ++i) {
+      utf8proc_int32_t dummy;
+      utf8proc_ssize_t sz = utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &dummy);
 #ifndef NDEBUG
-            if (*m_it == '\0')
-            {
-                fprintf(stderr, "ERROR! UTF8-iterator null-term fail\n");
-                abort();
-            }
-            else if (sz > 0)
-                m_it += sz;
-            else
-            {
-                fprintf(stderr, "ERROR! UTF8Iterator character fail");
-                abort();
-            }
+      if (*m_it == '\0') {
+        fprintf(stderr, "ERROR! UTF8-iterator null-term fail\n");
+        abort();
+      } else if (sz > 0)
+        m_it += sz;
+      else {
+        fprintf(stderr, "ERROR! UTF8Iterator character fail");
+        abort();
+      }
 #else
-            if (sz > 0)
-                m_it += sz;
+      if (sz > 0)
+        m_it += sz;
 #endif
-        }
-        return *this;
    }
-    UTF8Iterator& operator++()
-    {
-        return this->operator+=(1);
-    }
-    UTF8Iterator operator+(size_t v) const
-    {
-        UTF8Iterator ret(m_it);
-        ret += v;
-        return ret;
-    }
-    uint32_t operator*() const
-    {
-        utf8proc_int32_t ret;
-        utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &ret);
-        return ret;
-    }
-    std::string_view::const_iterator iter() const {return m_it;}
-    size_t countTo(std::string_view::const_iterator end) const
-    {
-        UTF8Iterator it(m_it);
-        size_t ret = 0;
-        while (it.iter() < end && *it != '\0')
-        {
-            ++ret;
-            ++it;
-        }
-        return ret;
+    return *this;
+  }
+  UTF8Iterator& operator++() { return this->operator+=(1); }
+  UTF8Iterator operator+(size_t v) const {
+    UTF8Iterator ret(m_it);
+    ret += v;
+    return ret;
+  }
+  uint32_t operator*() const {
+    utf8proc_int32_t ret;
+    utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(&*m_it), -1, &ret);
+    return ret;
+  }
+  std::string_view::const_iterator iter() const { return m_it; }
+  size_t countTo(std::string_view::const_iterator end) const {
+    UTF8Iterator it(m_it);
+    size_t ret = 0;
+    while (it.iter() < end && *it != '\0') {
+      ++ret;
+      ++it;
    }
+    return ret;
+  }
 };

 #endif

 #endif
-