18#define GP_UTF8_IS_ASCII(ch) (!((ch) & 0x80))
20#define GP_UTF8_IS_NBYTE(ch) (((ch) & 0xc0) == 0x80)
22#define GP_UTF8_IS_2BYTE(ch) (((ch) & 0xe0) == 0xc0)
24#define GP_UTF8_IS_3BYTE(ch) (((ch) & 0xf0) == 0xe0)
26#define GP_UTF8_IS_4BYTE(ch) (((ch) & 0xf8) == 0xf0)
28#define GP_UTF8_NBYTE_MASK 0x3f
37 uint32_t s0 = *str[0];
44 uint32_t s1 = *str[0];
49 s1 &= GP_UTF8_NBYTE_MASK;
54 return (s0 & 0x1f)<<6 | s1;
56 uint32_t s2 = *str[0];
61 s2 &= GP_UTF8_NBYTE_MASK;
66 return (s0 & 0x0f)<<12 | s1<<6 | s2;
70 uint32_t s3 = *str[0];
75 s3 &= GP_UTF8_NBYTE_MASK;
78 return (s0 & 0x07)<<18 | s1<<12 | s2<<6 | s3;
120 if (unicode < 0x0080)
123 if (unicode < 0x0800)
126 if (unicode < 0x10000)
143 if (unicode < 0x0080) {
144 buf[0] = unicode & 0x007f;
148 if (unicode < 0x0800) {
149 buf[0] = 0xc0 | (0x1f & (unicode>>6));
150 buf[1] = 0x80 | (0x3f & unicode);
154 if (unicode < 0x10000) {
155 buf[0] = 0xe0 | (0x0f & (unicode>>12));
156 buf[1] = 0x80 | (0x3f & (unicode>>6));
157 buf[2] = 0x80 | (0x3f & unicode);
161 buf[0] = 0xf0 | (0x07 & (unicode>>18));
162 buf[1] = 0x80 | (0x3f & (unicode>>12));
163 buf[2] = 0x80 | (0x3f & (unicode>>6));
164 buf[3] = 0x80 | (0x3f & unicode);
#define GP_UTF8_IS_2BYTE(ch)
#define GP_UTF8_IS_4BYTE(ch)
static uint32_t gp_utf8_next(const char **str)
Parses next unicode character in UTF-8 string.
#define GP_UTF8_IS_3BYTE(ch)
static unsigned int gp_utf8_bytes(uint32_t unicode)
Returns a number of bytes needed to store unicode character into UTF-8.
static int gp_to_utf8(uint32_t unicode, char *buf)
Writes an unicode character into a UTF-8 buffer.
#define GP_UTF8_IS_NBYTE(ch)
size_t gp_utf8_strlen(const char *str)
Returns a number of characters in UTF-8 string.
uint32_t gp_utf_fallback(uint32_t ch)
Attempts to strip diacritics from an unicode character.
int8_t gp_utf8_prev_chsz(const char *str, size_t off)
Returns number of bytes previous character is occupying in an UTF-8 string.
int8_t gp_utf8_next_chsz(const char *str, size_t off)
Returns number of bytes next character is occupying in an UTF-8 string.
#define GP_UTF8_IS_ASCII(ch)