libneo/include/neo/utf.h
2021-07-16 13:59:11 +02:00

127 lines
4.4 KiB
C

/** See the end of this file for copyright and license terms. */
/**
* @file Conversion utilities for raw UTF.
*
* Note that libneo strings already have native UTF-8 support, so you only
* really need this if you explicitly need to deal with different encodings or
* raw bytes, in which case i am very sorry.
*
* Special attention must be paid to methods reading raw UTF from `char *`
* parameters, as they may read a few bytes beyond the memory region allocated
* for them if the string contains a malformed UTF sequence.
*
* Believe me when i say you do not want to use these.
*/
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "neo/_toolchain.h"
#include "neo/_types.h"
/**
* Check whether a NUL terminated string is valid UTF-8.
*
* If the string contains any malformed code sequences, an error is yeeted.
*
* @param s: String to validate
* @param err: Error pointer
* @returns The number of UTF-8 code points (i.e. number of Unicode characters)
* excluding the terminating NUL byte; undefined on error
*/
usize utf8_check(const char *__restrict s, error *err);
/**
* Check whether a NUL terminated string is valid UTF-8, but read at most
* `maxsize + 3` bytes (this function uses `utf8_to_nchr` internally).
*
* If a NUL terminator is encountered before `maxsize` bytes, reading stops
* before the specified size. If the string contains any malformed code
* sequences, an error is yeeted.
*
* @param s: String to validate
* @param maxsize: Maximum amount of byte to read from `s`
* @param err: Error pointer
* @returns The number of UTF-8 code points (i.e. number of Unicode characters)
* excluding the terminating NUL byte; undefined on error
*/
usize utf8_ncheck(const char *__restrict s, usize maxsize, error *err);
/**
* Compute the length of a raw UTF-8 encoded, NUL terminated string.
*
* The string is *not* checked for malformed code sequences,
* use `utf8_check` for that.
*
* @param s: String to get the length of
* @returns: String length as in Unicode code points (not bytes),
* excluding the terminating NUL byte
*/
usize utf8_strlen(const char *__restrict s);
/**
* Get the amount of bytes a Unicode character takes up in UTF-8.
*
* If the character is outside of the Unicode range (`0x00000000`~`0x0010ffff`),
* an error is yeeted.
*
* @param c: The character
* @param err: Error pointer
* @returns The amount of bytes needed to store the character in UTF-8 encoding,
* which is always between 1 and 4 except on errors
*/
usize utf8_chrsize(nchar c, error *err);
/**
* UTF-8 encode a Unicode character and store it in `dest` with NUL terminator.
*
* The buffer needs to hold at least 5 bytes. If the character is outside of
* the Unicode range (`0x00000000`~`0x0010ffff`), an error is yeeted and the
* buffer is not modified.
*
* @param dest: Where to store the encoded character (*not* NUL terminated)
* @param c: Character to encode
* @param err: Error pointer
* @returns The amount of bytes taken up by the character,
* which is always between 1 and 4 except on errors
*/
usize utf8_from_nchr(char *__restrict dest, nchar c, error *err);
/**
* Decode a UTF-8 character and store it in `c`.
*
* If the character encoding is malformed, an error is yeeted and `c` is set to
* the ASCII NUL character. The encoded character does not need to be NUL
* terminated. The amount of bytes read from the buffer depends on the first
* byte which marks the beginning of the Unicode code point. Keep in mind that
* this may cause the method to read up to 3 bytes over the end of the buffer
* if the code sequence is malformed.
*
* @param c: Where to store the decoded character
* @param utf8chr: UTF-8 encoded character sequence
* @param err: Error pointer
* @returns The amount of bytes the character took up when encoded as UTF-8,
* which is always between 1 and 4 except on errors
*/
usize utf8_to_nchr(nchar *c, const char *__restrict utf8chr, error *err);
#ifdef __cplusplus
}; /* extern "C" */
#endif
/*
* This file is part of libneo.
* Copyright (c) 2021 Fefie <owo@fef.moe>.
*
* libneo is non-violent software: you may only use, redistribute,
* and/or modify it under the terms of the CNPLv6+ as found in
* the LICENSE file in the source code root directory or at
* <https://git.pixie.town/thufie/CNPL>.
*
* libneo comes with ABSOLUTELY NO WARRANTY, to the extent
* permitted by applicable law. See the CNPLv6+ for details.
*/