test: add tests for utf8_chrsize

utf: fix name ambiguity of utf8_chrsize
5 changed files with 92 additions and 18 deletions
--- a/src/string/utf.c
+++ b/src/string/utf.c
@ -40,7 +40,7 @@ usize utf8_strlen(const char *restrict s)
 	return len;
 }

-usize utf8_nchr_size(nchar c, error *err)
+usize utf8_chrsize(nchar c, error *err)
 {
 	usize ret;

@ -76,7 +76,7 @@ usize utf8_from_nchr(char *restrict dest, nchar c, error *err)
 {
 	static const u8 prefixes[] = { 0x00, 0x00, 0xc0, 0xe0, 0xf0 };

-	usize utf8_size = utf8_nchr_size(c, err);
+	usize utf8_size = utf8_chrsize(c, err);
 	catch(err) {
 		*dest = '\0';
 		return 0;
@ -200,7 +200,7 @@ usize utf8_to_nchr(nchar *dest, const char *restrict _utf8chr, error *err)
 		if ((eflags & 0x01) != 0) {
 			yeet(err, EINVAL,
 			     "Non canonical UTF-8 encoding: %zu byte character stored in %u bytes",
-			     utf8_nchr_size(c, nil), len);
+			     utf8_chrsize(c, nil), len);
 		} else if ((eflags & 0x02) != 0) {
 			yeet(err, EINVAL, "Illegal UTF-8 sequence start byte: 0x%02x", utf8chr[0]);
 		} else if ((eflags & 0x0c) != 0) {
--- a/test/string/utf/utf.cmake
+++ b/test/string/utf/utf.cmake
@ -1,6 +1,7 @@
 # See the end of this file for copyright and license terms.

 target_sources(neo_test PRIVATE
+    string/utf/utf8_chrsize.cpp
    string/utf/utf8_from_nchr.cpp
    string/utf/utf8_to_nchr.cpp
 )
--- a/test/string/utf/utf8_chrsize.cpp
+++ b/test/string/utf/utf8_chrsize.cpp
@ -0,0 +1,73 @@
+/** See the end of this file for copyright and license terms. */
+
+#include <catch2/catch.hpp>
+#include <errno.h>
+
+#include <neo.h>
+#include <neo/utf.h>
+
+TEST_CASE( "utf8_chrsize: Identify 1 byte character", "[string/utf.c]" )
+{
+	error err;
+	usize size = utf8_chrsize(',', &err);
+
+	REQUIRE( size == 1 );
+	REQUIRE( errnum(&err) == 0 );
+}
+
+TEST_CASE( "utf8_chrsize: Identify 2 byte character", "[string/utf.c]" )
+{
+	error err;
+	/* U+03B1 Greek Smol Letter Alpha */
+	usize size = utf8_chrsize(0x03b1, &err);
+
+	REQUIRE( size == 2 );
+	REQUIRE( errnum(&err) == 0 );
+}
+
+TEST_CASE( "utf8_chrsize: Identify 3 byte character", "[string/utf.c]" )
+{
+	error err;
+	/* U+3042 Hiragana Letter A */
+	usize size = utf8_chrsize(0x3042, &err);
+
+	REQUIRE( size == 3 );
+	REQUIRE( errnum(&err) == 0 );
+}
+
+TEST_CASE( "utf8_chrsize: Identify 4 byte character", "[string/utf.c]" )
+{
+	error err;
+	/* U+1F97A The Bottom Emoji(TM) */
+	usize size = utf8_chrsize(0x01f97a, &err);
+
+	REQUIRE( size == 4 );
+	REQUIRE( errnum(&err) == 0 );
+}
+
+TEST_CASE( "utf8_chrsize: Error if out of Unicode range", "[string/utf.c]" )
+{
+	error err;
+	/* Unicode range is 0x00~0x10ffff */
+	usize size = utf8_chrsize(0x110000, &err);
+
+	string *expected = nstr("Character code not within Unicode range", nil);
+	string *actual = errmsg(&err);
+
+	REQUIRE( size == 0 );
+	REQUIRE( errnum(&err) == EINVAL );
+	REQUIRE( nstreq(expected, actual, nil) );
+}
+
+/*
+ * This file is part of libneo.
+ * Copyright (c) 2021 Fefie <owo@fef.moe>.
+ *
+ * libneo is non-violent software: you may only use, redistribute,
+ * and/or modify it under the terms of the CNPLv6+ as found in
+ * the LICENSE file in the source code root directory or at
+ * <https://git.pixie.town/thufie/CNPL>.
+ *
+ * libneo comes with ABSOLUTELY NO WARRANTY, to the extent
+ * permitted by applicable law.  See the CNPLv6+ for details.
+ */
--- a/test/string/utf/utf8_from_nchr.cpp
+++ b/test/string/utf/utf8_from_nchr.cpp
@ -6,7 +6,7 @@
 #include <neo.h>
 #include <neo/utf.h>

-TEST_CASE( "Encode 1-byte character", "[utf8_from_nchr]" )
+TEST_CASE( "utf8_from_nchr: Encode 1-byte character", "[string/utf.c]" )
 {
 	char buf[5] = { '\xff', '\xff', '\xff', '\xff', '\xff' };
 	error err;
@ -21,7 +21,7 @@ TEST_CASE( "Encode 1-byte character", "[utf8_from_nchr]" )
 	REQUIRE( errnum(&err) == 0 );
 }

-TEST_CASE( "Encode 2-byte character", "[utf8_from_nchr]" )
+TEST_CASE( "utf8_from_nchr: Encode 2-byte character", "[string/utf.c]" )
 {
 	char buf[5] = { '\xff', '\xff', '\xff', '\xff', '\xff' };
 	error err;
@ -37,7 +37,7 @@ TEST_CASE( "Encode 2-byte character", "[utf8_from_nchr]" )
 	REQUIRE( errnum(&err) == 0 );
 }

-TEST_CASE( "Encode 3-byte character", "[utf8_from_nchr]" )
+TEST_CASE( "utf8_from_nchr: Encode 3-byte character", "[string/utf.c]" )
 {
 	char buf[5] = { '\xff', '\xff', '\xff', '\xff', '\xff' };
 	error err;
@ -53,7 +53,7 @@ TEST_CASE( "Encode 3-byte character", "[utf8_from_nchr]" )
 	REQUIRE( errnum(&err) == 0 );
 }

-TEST_CASE( "Encode 4-byte character", "[utf8_from_nchr]" )
+TEST_CASE( "utf8_from_nchr: Encode 4-byte character", "[string/utf.c]" )
 {
 	char buf[5] = { '\xff', '\xff', '\xff', '\xff', '\xff' };
 	error err;
@ -69,7 +69,7 @@ TEST_CASE( "Encode 4-byte character", "[utf8_from_nchr]" )
 	REQUIRE( errnum(&err) == 0 );
 }

-TEST_CASE( "Error if out of Unicode range", "[utf8_from_nchr]" )
+TEST_CASE( "utf8_from_nchr: Error if out of Unicode range", "[string/utf.c]" )
 {
 	char buf[5] = { '\xff', '\xff', '\xff', '\xff', '\xff' };
 	error err;
@ -87,7 +87,7 @@ TEST_CASE( "Error if out of Unicode range", "[utf8_from_nchr]" )
 	REQUIRE( size == 0 );
 	REQUIRE( errnum(&err) == EINVAL );
 	REQUIRE( nstreq(expected, actual, nil) );
-	printf("%s\n", errmsg(&err)->_data);
+	errput(&err);
 }

 /*
--- a/test/string/utf/utf8_to_nchr.cpp
+++ b/test/string/utf/utf8_to_nchr.cpp
@ -6,7 +6,7 @@
 #include <neo.h>
 #include <neo/utf.h>

-TEST_CASE( "Decode 1-byte character sequence", "[utf8_to_nchr]" )
+TEST_CASE( "utf8_to_nchr: Decode 1-byte character sequence", "[string/utf.c]" )
 {
 	error err;
 	nchar c;
@ -18,7 +18,7 @@ TEST_CASE( "Decode 1-byte character sequence", "[utf8_to_nchr]" )
 	REQUIRE( errmsg(&err) == nil );
 }

-TEST_CASE( "Decode 2-byte character sequence", "[utf8_to_nchr]" )
+TEST_CASE( "utf8_to_nchr: Decode 2-byte character sequence", "[string/utf.c]" )
 {
 	error err;
 	nchar c;
@ -31,7 +31,7 @@ TEST_CASE( "Decode 2-byte character sequence", "[utf8_to_nchr]" )
 	REQUIRE( errmsg(&err) == nil );
 }

-TEST_CASE( "Decode 3-byte character sequence", "[utf8_to_nchr]" )
+TEST_CASE( "utf8_to_nchr: Decode 3-byte character sequence", "[string/utf.c]" )
 {
 	error err;
 	nchar c;
@ -44,7 +44,7 @@ TEST_CASE( "Decode 3-byte character sequence", "[utf8_to_nchr]" )
 	REQUIRE( errmsg(&err) == nil );
 }

-TEST_CASE( "Decode 4-byte character sequence", "[utf8_to_nchr]" )
+TEST_CASE( "utf8_to_nchr: Decode 4-byte character sequence", "[string/utf.c]" )
 {
 	error err;
 	nchar c;
@ -57,7 +57,7 @@ TEST_CASE( "Decode 4-byte character sequence", "[utf8_to_nchr]" )
 	REQUIRE( errmsg(&err) == nil );
 }

-TEST_CASE( "Error on malformed sequence start", "[utf8_to_nchr]" )
+TEST_CASE( "utf8_to_nchr: Error on malformed sequence start", "[string/utf.c]" )
 {
 	error err;
 	nchar c;
@ -72,7 +72,7 @@ TEST_CASE( "Error on malformed sequence start", "[utf8_to_nchr]" )
 	errput(&err);
 }

-TEST_CASE( "Error on wrong second byte", "[utf8_to_nchr]" )
+TEST_CASE( "utf8_to_nchr: Error on wrong second byte", "[string/utf.c]" )
 {
 	error err;
 	nchar c;
@ -87,7 +87,7 @@ TEST_CASE( "Error on wrong second byte", "[utf8_to_nchr]" )
 	errput(&err);
 }

-TEST_CASE( "Error on wrong third byte", "[utf8_to_nchr]" )
+TEST_CASE( "utf8_to_nchr: Error on wrong third byte", "[string/utf.c]" )
 {
 	error err;
 	nchar c;
@ -102,7 +102,7 @@ TEST_CASE( "Error on wrong third byte", "[utf8_to_nchr]" )
 	errput(&err);
 }

-TEST_CASE( "Error on wrong fourth byte", "[utf8_to_nchr]" )
+TEST_CASE( "utf8_to_nchr: Error on wrong fourth byte", "[string/utf.c]" )
 {
 	error err;
 	nchar c;
@ -117,7 +117,7 @@ TEST_CASE( "Error on wrong fourth byte", "[utf8_to_nchr]" )
 	errput(&err);
 }

-TEST_CASE( "Error on non canonical encoding", "[utf8_to_nchr]" )
+TEST_CASE( "utf8_to_nchr: Error on non canonical encoding", "[string/utf.c]" )
 {
 	error err;
 	nchar c;
Author	SHA1	Message	Date
anna	f41fc5ad44	test: add tests for utf8_chrsize	3 years ago
anna	f27d00a4a2	utf: fix name ambiguity of utf8_chrsize	3 years ago