GFXprim
2D bitmap graphics library with emphasis on speed and correctness
Loading...
Searching...
No Matches
gp_utf.h
Go to the documentation of this file.
1// SPDX-License-Identifier: LGPL-2.1-or-later
2/*
3 * Copyright (C) 2022 Cyril Hrubis <metan@ucw.cz>
4 */
5
11#ifndef UTILS_GP_UTF_H
12#define UTILS_GP_UTF_H
13
14#include <stdint.h>
15#include <stddef.h>
16
18#define GP_UTF8_IS_ASCII(ch) (!((ch) & 0x80))
20#define GP_UTF8_IS_NBYTE(ch) (((ch) & 0xc0) == 0x80)
22#define GP_UTF8_IS_2BYTE(ch) (((ch) & 0xe0) == 0xc0)
24#define GP_UTF8_IS_3BYTE(ch) (((ch) & 0xf0) == 0xe0)
26#define GP_UTF8_IS_4BYTE(ch) (((ch) & 0xf8) == 0xf0)
27
28#define GP_UTF8_NBYTE_MASK 0x3f
29
35static inline uint32_t gp_utf8_next(const char **str)
36{
37 uint32_t s0 = *str[0];
38
39 (*str)++;
40
41 if (GP_UTF8_IS_ASCII(s0))
42 return s0;
43
44 uint32_t s1 = *str[0];
45
46 if (!GP_UTF8_IS_NBYTE(s1))
47 return 0;
48
49 s1 &= GP_UTF8_NBYTE_MASK;
50
51 (*str)++;
52
53 if (GP_UTF8_IS_2BYTE(s0))
54 return (s0 & 0x1f)<<6 | s1;
55
56 uint32_t s2 = *str[0];
57
58 if (!GP_UTF8_IS_NBYTE(s2))
59 return 0;
60
61 s2 &= GP_UTF8_NBYTE_MASK;
62
63 (*str)++;
64
65 if (GP_UTF8_IS_3BYTE(s0))
66 return (s0 & 0x0f)<<12 | s1<<6 | s2;
67
68 (*str)++;
69
70 uint32_t s3 = *str[0];
71
72 if (!GP_UTF8_IS_NBYTE(s2))
73 return 0;
74
75 s3 &= GP_UTF8_NBYTE_MASK;
76
77 if (GP_UTF8_IS_4BYTE(s0))
78 return (s0 & 0x07)<<18 | s1<<12 | s2<<6 | s3;
79
80 return 0;
81}
82
90int8_t gp_utf8_next_chsz(const char *str, size_t off);
91
99int8_t gp_utf8_prev_chsz(const char *str, size_t off);
100
110size_t gp_utf8_strlen(const char *str);
111
118static inline unsigned int gp_utf8_bytes(uint32_t unicode)
119{
120 if (unicode < 0x0080)
121 return 1;
122
123 if (unicode < 0x0800)
124 return 2;
125
126 if (unicode < 0x10000)
127 return 3;
128
129 return 4;
130}
131
141static inline int gp_to_utf8(uint32_t unicode, char *buf)
142{
143 if (unicode < 0x0080) {
144 buf[0] = unicode & 0x007f;
145 return 1;
146 }
147
148 if (unicode < 0x0800) {
149 buf[0] = 0xc0 | (0x1f & (unicode>>6));
150 buf[1] = 0x80 | (0x3f & unicode);
151 return 2;
152 }
153
154 if (unicode < 0x10000) {
155 buf[0] = 0xe0 | (0x0f & (unicode>>12));
156 buf[1] = 0x80 | (0x3f & (unicode>>6));
157 buf[2] = 0x80 | (0x3f & unicode);
158 return 3;
159 }
160
161 buf[0] = 0xf0 | (0x07 & (unicode>>18));
162 buf[1] = 0x80 | (0x3f & (unicode>>12));
163 buf[2] = 0x80 | (0x3f & (unicode>>6));
164 buf[3] = 0x80 | (0x3f & unicode);
165 return 4;
166}
167
178uint32_t gp_utf_fallback(uint32_t ch);
179
180#endif /* UTILS_GP_UTF_H */
#define GP_UTF8_IS_2BYTE(ch)
Definition gp_utf.h:22
#define GP_UTF8_IS_4BYTE(ch)
Definition gp_utf.h:26
static uint32_t gp_utf8_next(const char **str)
Parses next unicode character in UTF-8 string.
Definition gp_utf.h:35
#define GP_UTF8_IS_3BYTE(ch)
Definition gp_utf.h:24
static unsigned int gp_utf8_bytes(uint32_t unicode)
Returns a number of bytes needed to store unicode character into UTF-8.
Definition gp_utf.h:118
static int gp_to_utf8(uint32_t unicode, char *buf)
Writes an unicode character into a UTF-8 buffer.
Definition gp_utf.h:141
#define GP_UTF8_IS_NBYTE(ch)
Definition gp_utf.h:20
size_t gp_utf8_strlen(const char *str)
Returns a number of characters in UTF-8 string.
uint32_t gp_utf_fallback(uint32_t ch)
Attempts to strip diacritics from an unicode character.
int8_t gp_utf8_prev_chsz(const char *str, size_t off)
Returns number of bytes previous character is occupying in an UTF-8 string.
int8_t gp_utf8_next_chsz(const char *str, size_t off)
Returns number of bytes next character is occupying in an UTF-8 string.
#define GP_UTF8_IS_ASCII(ch)
Definition gp_utf.h:18