Compare commits

...

3 Commits

@ -40,12 +40,15 @@ int _neo_nput(struct _neo_nref *ref);
* Decrement the reference counter of a structure embedding `NREF_FIELD`.
*
* If the counter reaches zero, the destroy callback passed to `nref_init`
* is invoked.
* is invoked and the pointer is set to `nil` to prevent any further usage.
*
* @param ptr: The `struct *` to decrement the reference counter of
* @returns The new reference count
*/
#define nput(ptr) (_neo_nput( &(ptr)->__neo_nref ))
#define nput(ptr) ({ \
if (_neo_nput(&(ptr)->__neo_nref) == 0) \
ptr = nil; \
})
/*
* This file is part of libneo.

@ -35,6 +35,22 @@ extern "C" {
*/
usize utf8_check(const char *__restrict s, error *err);
/**
* Check whether a NUL terminated string is valid UTF-8, but read at most
* `maxsize + 3` bytes (this function uses `utf8_to_nchr` internally).
*
* If a NUL terminator is encountered before `maxsize` bytes, reading stops
* before the specified size. If the string contains any malformed code
* sequences, an error is yeeted.
*
* @param s: String to validate
* @param maxsize: Maximum amount of byte to read from `s`
* @param err: Error pointer
* @returns The number of UTF-8 code points (i.e. number of Unicode characters)
* excluding the terminating NUL byte; undefined on error
*/
usize utf8_ncheck(const char *__restrict s, usize maxsize, error *err);
/**
* Compute the length of a raw UTF-8 encoded, NUL terminated string.
*

@ -21,7 +21,7 @@ static void nstr_destroy(string *str)
static string *nstr_unsafe(const char *restrict s, usize size_without_nul, error *err)
{
usize len = utf8_check(s, err);
usize len = utf8_ncheck(s, size_without_nul, err);
catch(err) {
return nil;
}

@ -11,13 +11,21 @@
int nstrcmp(const string *s1, const string *s2, error *err)
{
/*
* Return values are always undefined if an error was yeeted so it's not
* strictly necessary to return a correct(ish) result here, but the
* philosophy of libneo is to always behave in a way that is the least
* harmful.
*/
if (s1 == nil) {
yeet(err, EFAULT, "First string is nil");
return 0;
if (s2 == nil)
return 0;
return -1;
}
if (s2 == nil) {
yeet(err, EFAULT, "Second string is nil");
return 0;
return 1;
}
int ret;

@ -30,6 +30,24 @@ usize utf8_check(const char *restrict s, error *err)
return ret;
}
usize utf8_ncheck(const char *restrict s, usize maxsize, error *err)
{
usize ret = 0;
nchar c;
while (*s != '\0' && maxsize != 0) {
ret++;
usize size = utf8_to_nchr(&c, s, err);
s += size;
maxsize -= size;
catch(err) {
break;
}
}
return ret;
}
usize utf8_strlen(const char *restrict s)
{
usize len = 0;

@ -4,6 +4,7 @@ target_sources(neo_test PRIVATE
string/utf/utf8_check.cpp
string/utf/utf8_chrsize.cpp
string/utf/utf8_from_nchr.cpp
string/utf/utf8_ncheck.cpp
string/utf/utf8_strlen.cpp
string/utf/utf8_to_nchr.cpp
)

@ -0,0 +1,138 @@
/** See the end of this file for copyright and license terms. */
#include <catch2/catch.hpp>
#include <errno.h>
#include <neo.h>
#include <neo/utf.h>
TEST_CASE( "utf8_ncheck: ASCII string", "[string/utf.c]" )
{
error err;
usize len = utf8_ncheck("i'm gay,,,", 10, &err);
REQUIRE( len == 10 );
REQUIRE( errnum(&err) == 0 );
REQUIRE( errmsg(&err) == nil );
}
TEST_CASE( "utf8_ncheck: String with 2-byte UTF-8 sequence", "[string/utf.c]" )
{
error err;
/* U+03B1 Greek Smol Letter Alpha */
usize len = utf8_ncheck("i'm g\xce\xb1y,,,", 11, &err);
REQUIRE( len == 10 );
REQUIRE( errnum(&err) == 0 );
REQUIRE( errmsg(&err) == nil );
}
TEST_CASE( "utf8_ncheck: String with 3-byte UTF-8 sequence", "[string/utf.c]" )
{
error err;
/* U+3042 Hiragana Letter A */
usize len = utf8_ncheck("i'm g\xe3\x81\x82y,,,", 12, &err);
REQUIRE( len == 10 );
REQUIRE( errnum(&err) == 0 );
REQUIRE( errmsg(&err) == nil );
}
TEST_CASE( "utf8_ncheck: String with 4-byte UTF-8 sequence", "[string/utf.c]" )
{
error err;
/* U+1F97A The Bottom Emoji(TM) */
usize len = utf8_ncheck("i'm gay\xf0\x9f\xa5\xba,,,", 14, &err);
REQUIRE( len == 11 );
REQUIRE( errnum(&err) == 0 );
REQUIRE( errmsg(&err) == nil );
}
TEST_CASE( "utf8_ncheck: Don't overread", "[string/utf.c]" )
{
error err;
usize len = utf8_ncheck("i'm gay,,,", 8, &err);
REQUIRE( len == 8 );
REQUIRE( errnum(&err) == 0 );
REQUIRE( errmsg(&err) == nil );
}
TEST_CASE( "utf8_ncheck: Error on malformed sequence start", "[string/utf.c]" )
{
error err;
utf8_ncheck("\xff", 1, &err);
string *expected = nstr("Illegal UTF-8 sequence start byte: 0xff", nil);
string *actual = errmsg(&err);
REQUIRE( errnum(&err) == EINVAL );
REQUIRE( nstreq(expected, actual, nil) );
errput(&err);
}
TEST_CASE( "utf8_ncheck: Error on wrong second byte", "[string/utf.c]" )
{
error err;
utf8_ncheck("\xce\xff", 2, &err);
string *expected = nstr("Byte 2 in UTF-8 sequence invalid: 0xff", nil);
string *actual = errmsg(&err);
REQUIRE( errnum(&err) == EINVAL );
REQUIRE( nstreq(expected, actual, nil) );
errput(&err);
}
TEST_CASE( "utf8_ncheck: Error on wrong third byte", "[string/utf.c]" )
{
error err;
utf8_ncheck("\xe3\x81\xff", 3, &err);
string *expected = nstr("Byte 3 in UTF-8 sequence invalid: 0xff", nil);
string *actual = errmsg(&err);
REQUIRE( errnum(&err) == EINVAL );
REQUIRE( nstreq(expected, actual, nil) );
errput(&err);
}
TEST_CASE( "utf8_ncheck: Error on wrong fourth byte", "[string/utf.c]" )
{
error err;
utf8_ncheck("\xf0\x9f\xa5\xff", 4, &err);
string *expected = nstr("Byte 4 in UTF-8 sequence invalid: 0xff", nil);
string *actual = errmsg(&err);
REQUIRE( errnum(&err) == EINVAL );
REQUIRE( nstreq(expected, actual, nil) );
errput(&err);
}
TEST_CASE( "utf8_ncheck: Error on non canonical encoding", "[string/utf.c]" )
{
error err;
utf8_ncheck("\xf0\x80\x80\xa0", 4, &err);
string *expected = nstr("Non canonical UTF-8 encoding: 1 byte character stored in 4 bytes", nil);
string *actual = errmsg(&err);
REQUIRE( errnum(&err) == EINVAL );
REQUIRE( nstreq(expected, actual, nil) );
errput(&err);
}
/*
* This file is part of libneo.
* Copyright (c) 2021 Fefie <owo@fef.moe>.
*
* libneo is non-violent software: you may only use, redistribute,
* and/or modify it under the terms of the CNPLv6+ as found in
* the LICENSE file in the source code root directory or at
* <https://git.pixie.town/thufie/CNPL>.
*
* libneo comes with ABSOLUTELY NO WARRANTY, to the extent
* permitted by applicable law. See the CNPLv6+ for details.
*/
Loading…
Cancel
Save