GFXprim
2D bitmap graphics library with emphasis on speed and correctness
Loading...
Searching...
No Matches
Macros | Functions
gp_utf.h File Reference

Unicode helper macros and functions. More...

#include <stdint.h>
#include <stddef.h>

Go to the source code of this file.

Macros

#define GP_UTF8_IS_ASCII(ch)   (!((ch) & 0x80))
 
#define GP_UTF8_IS_NBYTE(ch)   (((ch) & 0xc0) == 0x80)
 
#define GP_UTF8_IS_2BYTE(ch)   (((ch) & 0xe0) == 0xc0)
 
#define GP_UTF8_IS_3BYTE(ch)   (((ch) & 0xf0) == 0xe0)
 
#define GP_UTF8_IS_4BYTE(ch)   (((ch) & 0xf8) == 0xf0)
 

Functions

static uint32_t gp_utf8_next (const char **str)
 Parses next unicode character in UTF-8 string.
 
int8_t gp_utf8_next_chsz (const char *str, size_t off)
 Returns number of bytes next character is occupying in an UTF-8 string.
 
int8_t gp_utf8_prev_chsz (const char *str, size_t off)
 Returns number of bytes previous character is occupying in an UTF-8 string.
 
size_t gp_utf8_strlen (const char *str)
 Returns a number of characters in UTF-8 string.
 
static unsigned int gp_utf8_bytes (uint32_t unicode)
 Returns a number of bytes needed to store unicode character into UTF-8.
 
static int gp_to_utf8 (uint32_t unicode, char *buf)
 Writes an unicode character into a UTF-8 buffer.
 
uint32_t gp_utf_fallback (uint32_t ch)
 Attempts to strip diacritics from an unicode character.
 

Detailed Description

Unicode helper macros and functions.

Definition in file gp_utf.h.

Macro Definition Documentation

◆ GP_UTF8_IS_2BYTE

#define GP_UTF8_IS_2BYTE (   ch)    (((ch) & 0xe0) == 0xc0)

Returns true if we have first unicode byte of two byte sequence

Definition at line 22 of file gp_utf.h.

◆ GP_UTF8_IS_3BYTE

#define GP_UTF8_IS_3BYTE (   ch)    (((ch) & 0xf0) == 0xe0)

Returns true if we have first unicode byte of three byte sequence

Definition at line 24 of file gp_utf.h.

◆ GP_UTF8_IS_4BYTE

#define GP_UTF8_IS_4BYTE (   ch)    (((ch) & 0xf8) == 0xf0)

Returns true if we have first unicode byte of four byte sequence

Definition at line 26 of file gp_utf.h.

◆ GP_UTF8_IS_ASCII

#define GP_UTF8_IS_ASCII (   ch)    (!((ch) & 0x80))

Returns true if unicode byte is ASCII

Definition at line 18 of file gp_utf.h.

◆ GP_UTF8_IS_NBYTE

#define GP_UTF8_IS_NBYTE (   ch)    (((ch) & 0xc0) == 0x80)

Returns true if we have first unicode byte of single byte sequence

Definition at line 20 of file gp_utf.h.

Function Documentation

◆ gp_to_utf8()

static int gp_to_utf8 ( uint32_t  unicode,
char *  buf 
)
inlinestatic

Writes an unicode character into a UTF-8 buffer.

The buffer must be large enough!

Parameters
unicodeA unicode character.
bufA byte buffer.
Returns
A number of bytes written.

Definition at line 141 of file gp_utf.h.

Referenced by gp_vec_ins_utf8().

◆ gp_utf8_bytes()

static unsigned int gp_utf8_bytes ( uint32_t  unicode)
inlinestatic

Returns a number of bytes needed to store unicode character into UTF-8.

Parameters
unicodeA unicode character.
Returns
Number of utf8 bytes required to store a unicode character.

Definition at line 118 of file gp_utf.h.

Referenced by gp_vec_ins_utf8().

◆ gp_utf8_next()

static uint32_t gp_utf8_next ( const char **  str)
inlinestatic

Parses next unicode character in UTF-8 string.

Parameters
strA pointer to the C string.
Returns
A unicode character or 0 on error or end of the string.

Definition at line 35 of file gp_utf.h.

References GP_UTF8_IS_2BYTE, GP_UTF8_IS_3BYTE, GP_UTF8_IS_4BYTE, GP_UTF8_IS_ASCII, and GP_UTF8_IS_NBYTE.

Referenced by gp_utf8_pos_next(), and gp_utf8_pos_prev().

◆ gp_utf8_next_chsz()

int8_t gp_utf8_next_chsz ( const char *  str,
size_t  off 
)

Returns number of bytes next character is occupying in an UTF-8 string.

Parameters
strA pointer to a string.
offAn offset into the string, must point to a valid multibyte boundary.
Returns
Number of bytes next character occupies, zero on string end and -1 on failure.

Referenced by gp_utf8_pos_last(), gp_utf8_pos_move(), and gp_utf8_pos_next().

◆ gp_utf8_prev_chsz()

int8_t gp_utf8_prev_chsz ( const char *  str,
size_t  off 
)

Returns number of bytes previous character is occupying in an UTF-8 string.

Parameters
strA pointer to a string.
offAn offset into the string, must point to a valid multibyte boundary.
Returns
Number of bytes previous character occupies, and -1 on failure.

Referenced by gp_utf8_pos_move(), and gp_utf8_pos_prev().

◆ gp_utf8_strlen()

size_t gp_utf8_strlen ( const char *  str)

Returns a number of characters in UTF-8 string.

Returns number of characters in an UTF-8 string, which may be less or equal to what strlen() reports.

Parameters
strAn UTF-8 string.
Returns
Number of characters in the string.

◆ gp_utf_fallback()

uint32_t gp_utf_fallback ( uint32_t  ch)

Attempts to strip diacritics from an unicode character.

Attempts to strip diacritics and replace symbols with a similar meaning which produces text that can be stil readable even with ASCII only font.

Parameters
chAn UTF character.
Returns
A replacemement for a character with the same meaning or original character if mapping was not found.