libc: refactor a couple of string routines

This is just a minor overhaul of several utility functions, in part because it kept bothering me and in part because i was bored.
2021-10-12 23:24:17 +02:00 · 2021-10-12 23:24:17 +02:00 · 904584ccc0
commit 904584ccc0
parent afbb3743d5
15 changed files with 195 additions and 69 deletions
--- a/.vscode/c_cpp_properties.template.json
+++ b/.vscode/c_cpp_properties.template.json
@ -7,7 +7,7 @@
                "${workspaceFolder}/build/include"
            ],
            "defines": [
-                "__KERNEL__",
+                "_KERNEL",
                "_GAY_SOURCE=202109L"
            ],
            "compilerArgs": [
--- a/arch/x86/include/arch/_types.h
+++ b/arch/x86/include/arch/_types.h
@ -31,19 +31,19 @@ typedef	unsigned long long	__uint64_t;
 #ifdef	__LP64__
 typedef	__int32_t	__clock_t;		/* clock()... */
 typedef	__int64_t	__critical_t;
-#if !defined(__KERNEL__) && !defined(_FREESTANDING)
+#if !defined(_KERNEL) && !defined(_FREESTANDING)
 typedef	double		__double_t;
 typedef	float		__float_t;
-#endif /* not __KERNEL__ or _FREESTANDING */
+#endif /* not _KERNEL or _FREESTANDING */
 typedef	__int64_t	__intfptr_t;
 typedef	__int64_t	__intptr_t;
 #else /* not __LP64__ */
 typedef	unsigned long	__clock_t;
 typedef	__int32_t	__critical_t;
-#if !defined(__KERNEL__) && !defined(_FREESTANDING)
+#if !defined(_KERNEL) && !defined(_FREESTANDING)
 typedef	long double	__double_t;
 typedef	long double	__float_t;
-#endif /* not __KERNEL__ or _FREESTANDING */
+#endif /* not _KERNEL or _FREESTANDING */
 typedef	__int32_t	__intfptr_t;
 typedef	__int32_t	__intptr_t;
 #endif /* __LP64__ */
--- a/include/gay/mm.h
+++ b/include/gay/mm.h
@ -11,6 +11,8 @@
 * type of compiler warning if they are accidentally mixed up.
 */

+#ifdef _KERNEL
+
 #include <arch/page.h>

 #include <gay/types.h>
@ -118,6 +120,8 @@ int mem_init(uintptr_t start, uintptr_t end);
 */
 int kmalloc_init(uintptr_t start, uintptr_t end);

+#endif /* _KERNEL */
+
 /*
 * This file is part of GayBSD.
 * Copyright (c) 2021 fef <owo@fef.moe>.
--- a/include/gay/util.h
+++ b/include/gay/util.h
@ -10,11 +10,11 @@
 * @param a The array
 * @returns The number of elements
 */
-#define ARRAY_SIZE(a) ( sizeof(a) / sizeof(a[0]) )
+#define ARRAY_SIZE(a) ( sizeof(a) / sizeof((a)[0]) )

 #define abs(x) ({		\
 	typeof(x) __x = (x);	\
-	__x < 0 ? -__x : __x;	\
+	__x < 0 ? -__x : +__x;	\
 })

 #define max(a, b) ({		\
--- a/include/stdlib.h
+++ b/include/stdlib.h
@ -0,0 +1,34 @@
+/* See the end of this file for copyright and license terms. */
+
+#pragma once
+
+#include <gay/cdefs.h>
+#include <gay/types.h>
+
+#ifdef _KERNEL
+#include <gay/mm.h>
+#define malloc(size) kmalloc(size, MM_KERNEL)
+#efine free(ptr) kfree(ptr)
+#else
+/*
+ * i still have to do some include flags fuckery to get this working, for now
+ * these are just implemented in kernel/mm/kmalloc.c as __weak functions because
+ * this libc is *always* linked against the kernel atm.
+ */
+extern void *malloc(usize size) __malloc_like __alloc_size(1);
+extern void free(void *ptr);
+#endif
+
+/*
+ * This file is part of GayBSD.
+ * Copyright (c) 2021 fef <owo@fef.moe>.
+ *
+ * GayBSD is nonviolent software: you may only use, redistribute, and/or
+ * modify it under the terms of the Cooperative Nonviolent Public License
+ * (CNPL) as found in the LICENSE file in the source code root directory
+ * or at <https://git.pixie.town/thufie/npl-builder>; either version 7
+ * of the license, or (at your option) any later version.
+ *
+ * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent
+ * permitted by applicable law.  See the CNPL for details.
+ */
--- a/kernel/kprintf.c
+++ b/kernel/kprintf.c
@ -12,8 +12,8 @@
 * I hope i never have to touch this again.
 */

-#include <alloca.h> /* clang */
-#include <stdarg.h> /* clang */
+#include <alloca.h>
+#include <stdarg.h>

 #include <string.h>

--- a/kernel/mm/kmalloc.c
+++ b/kernel/mm/kmalloc.c
@ -2,6 +2,8 @@

 #include <arch/page.h>

+#include <gay/arith.h>
+#include <gay/cdefs.h>
 #include <gay/clist.h>
 #include <gay/config.h>
 #include <gay/errno.h>
@ -205,8 +207,29 @@ void kfree(void *ptr)
 	blk_try_merge(blk);
 }

+/*
+ * These wrappers are used for linking libc against the kernel itself.
+ * This is a "temporary" hack because i haven't figured out the whole C flags
+ * thingy for properly producing two versions of libc (one static one for the
+ * kernel and a shared one for user space).
+ */
+
+__weak void *malloc(usize size)
+{
+	return kmalloc(size, MM_KERNEL);
+}
+
+__weak void free(void *ptr)
+{
+	kfree(ptr);
+}
+
 static inline struct memblk *blk_create(usize num_pages)
 {
+	usize blksize;
+	if (mul_overflow(&blksize, num_pages, PAGE_SIZE))
+		return NULL;
+
 	/*
 	 * heap_end points to the first address that is not part of the heap
 	 * anymore, so that's where the new block starts when we add pages
@ -215,7 +238,7 @@ static inline struct memblk *blk_create(usize num_pages)
 	if (grow_heap(num_pages) != num_pages)
 		return NULL; /* OOM :( */

-	blk_set_size(blk, num_pages * PAGE_SIZE - OVERHEAD);
+	blk_set_size(blk, blksize - OVERHEAD);
 	blk_clear_alloc(blk);
 	blk_set_border_end(blk);

@ -302,7 +325,8 @@ static struct memblk *blk_slice(struct memblk *blk, usize slice_size)
 	 * than the full block size.
 	 */
 	usize rest_size = blk_get_size(blk) - slice_size - OVERHEAD;
-	if (rest_size < MIN_SIZE || rest_size + OVERHEAD < rest_size) {
+	bool carry = sub_underflow(&rest_size, blk_get_size(blk), slice_size + OVERHEAD);
+	if (rest_size < MIN_SIZE || carry) {
 		blk_set_alloc(blk);
 		return blk;
 	}
--- a/lib/c/include/string.h
+++ b/lib/c/include/string.h
@ -333,12 +333,12 @@ __pure char *strchrnul(const char *s, int c);
 char *strsep(char **stringp, const char *delim);
 #endif /* _GAY_SOURCE >= 202109L || __BSD_VISIBLE */

-#ifndef __KERNEL__ /* we *really* don't want this anywhere in kernel code */
+#ifndef _KERNEL /* we *really* don't want this anywhere in kernel code */
 	char *strtok(char *__restrict s, const char *__restrict tok);
 #	if _POSIX_C_SOURCE >= 199506L
 		char *strtok_r(char *s, const char *delim, char **last);
 #	endif /* _POSIX_C_SOURCE >= 199506L */
-#endif /* not __KERNEL__ */
+#endif /* not _KERNEL */

 /*
 * This file is part of GayBSD.
--- a/lib/c/string/strcpy.c
+++ b/lib/c/string/strcpy.c
@ -1,27 +1,93 @@
 /* See the end of this file for copyright and license terms. */

+#include <gay/types.h>
+
 #include <string.h>

+/*
+ * The two and four byte routines use the same trick as strlen(),
+ * see the respective file for details and where it's from.
+ */
+
+static inline void fourbyte_strcpy(char *to, const char *from)
+{
+	/* make sure we are aligned first */
+#pragma unroll(3)
+	while ((uintptr_t)to % 4) {
+		if ((*to++ = *from++) == '\0')
+			return;
+	}
+
+	/* copy in chunks of 4 bytes until there is a terminator */
+	u32 *to32 = (u32 *)to;
+	const u32 *from32 = (const u32 *)from;
+	while ( ((*from32 - 0x01010101) & (~*from32 & 0x80808080)) == 0 )
+		*to32++ = *from32++;
+
+	to = (char *)to32;
+	from = (const char *)from32;
+
+	/* copy the remaining bytes (can only be within 1 and 4) */
+#pragma unroll(4)
+	while ((*to++ = *from++) != '\0');
+		/* nothing */
+}
+
+static inline void twobyte_strcpy(char *to, const char *from)
+{
+	if ((uintptr_t)to % 2) {
+		if ((*to++ = *from++) == '\0')
+			return;
+	}
+
+	u16 *to16 = (u16 *)to;
+	const u16 *from16 = (const u16 *)from;
+	while ( ((*from16 - 0x0101) & (~*from16 & 0x8080)) == 0 )
+		*to16++ = *from16++;
+
+	to = (char *)to16;
+	from = (const char *)from16;
+
+	/* copy the remaining bytes (can only be within 1 and 2) */
+#pragma unroll(2)
+	while ((*to++ = *from++) != '\0');
+		/* nothing */
+}
+
 #ifdef WEAK_STRCPY
-__weak_reference(__strcpy, strcpy);
+#define STRCPY_NAME __strcpy
+#else
+#define STRCPY_NAME strcpy
 #endif

-char *
-#ifdef WEAK_STRCPY
-__strcpy
-#else
-strcpy
-#endif
-(char *restrict to, const char *restrict from)
+char *STRCPY_NAME(char *restrict to, const char *restrict from)
 {
 	char *save = to;

-	for (; (*to = *from); ++from, ++to)
-		/* nothing */;
+	/*
+	 * Check if the pointers can both be aligned to 2 or 4 bytes and copy
+	 * in whole blocks of 2 or 4 bytes respectively if that's the case.
+	 * This optimization is utterly useless because i'm almost certainly
+	 * gonna write (or "import", rather) dedicated assembly routines but
+	 * hey it can't hurt to introduce new sources for bugs right?
+	 */
+	if (((uintptr_t)to % 4) == ((uintptr_t)from % 4)) {
+		fourbyte_strcpy(to, from);
+	} else if (((uintptr_t)to % 2) == ((uintptr_t)from % 2)) {
+		twobyte_strcpy(to, from);
+	} else {
+		while ((*to++ = *from++) != '\0');
+			/* nothing */
+	}

 	return save;
 }

+#ifdef WEAK_STRCPY
+#include <gay/cdefs.h>
+__weak __alias(__strcpy) char *strcpy(char *restrict, const char *restrict);
+#endif /* WEAK_STRCPY */
+
 /*
 * This file is part of GayBSD.
 * Copyright (c) 2021 fef <owo@fef.moe>.
--- a/lib/c/string/strdup.c
+++ b/lib/c/string/strdup.c
@ -1,7 +1,8 @@
 /* See the end of this file for copyright and license terms. */

-#include <gay/mm.h>
+#include <gay/types.h>

+#include <stdlib.h>
 #include <string.h>

 char *strdup(const char *str)
@ -10,7 +11,7 @@ char *strdup(const char *str)
 	char *copy;

 	len = strlen(str) + 1;
-	if ((copy = kmalloc(len, MM_KERNEL)) == NULL)
+	if ((copy = malloc(len)) == NULL)
 		return NULL;

 	memcpy(copy, str, len);
--- a/lib/c/string/strlen.c
+++ b/lib/c/string/strlen.c
@ -12,7 +12,7 @@
 *
 *	((x - 0x01....01) & ~x & 0x80....80)
 *
- * would evaluate to a non-zero value iff any of the bytes in the
+ * would evaluate to a non-zero value if any of the bytes in the
 * original word is zero.
 *
 * The algorithm above is found on "Hacker's Delight" by
@ -34,23 +34,13 @@ static const unsigned long mask80 = 0x8080808080808080;
 #error "Unsupported word size"
 #endif

-#define	LONGPTR_MASK (sizeof(long) - 1)
-
-/*
- * Helper macro to return string length if we caught the zero
- * byte.
- */
-#define testbyte(x)				\
-	do {					\
-		if (p[x] == '\0')		\
-		    return (p - str + x);	\
-	} while (0)
+#define	LONGPTR_MASK (sizeof(unsigned long) - 1)

 usize strlen(const char *str)
 {
 	const char *p;
 	const unsigned long *lp;
-	long va, vb;
+	unsigned long va, vb;

 	/*
 	 * Before trying the hard (unaligned byte-by-byte access) way
@ -63,38 +53,41 @@ usize strlen(const char *str)
 	 * boundaries is integral multiple of word size.
 	 */
 	lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
-	va = (*lp - mask01); /* NOLINT */
-	vb = ((~*lp) & mask80); /* NOLINT */
+	va = *lp - mask01;
+	vb = (~*lp) & mask80;
 	lp++;
 	if (va & vb) {
 		/* Check if we have \0 in the first part */
 		for (p = str; p < (const char *)lp; p++) {
 			if (*p == '\0')
-				return (p - str);
+				return p - str;
 		}
 	}

 	/* Scan the rest of the string using word sized operation */
 	for (; ; lp++) {
-		va = (*lp - mask01); /* NOLINT */
-		vb = ((~*lp) & mask80); /* NOLINT */
+		va = *lp - mask01;
+		vb = (~*lp) & mask80;
 		if (va & vb) {
-			p = (const char *)(lp);
-			testbyte(0);
-			testbyte(1);
-			testbyte(2);
-			testbyte(3);
-#if (LONG_BIT >= 64)
-			testbyte(4);
-			testbyte(5);
-			testbyte(6);
-			testbyte(7);
-#endif
+			/*
+			 * The original version from FreeBSD uses 4 (or 8) if
+			 * branches querying every byte sequentially here.
+			 * I was bored and rewrote it branchless, but that
+			 * probably doesn't help in terms of performance
+			 * because clang's optimizer is better anyway.
+			 */
+			p = (const char *)lp;
+			unsigned long l = *lp;
+			unsigned long add = 1;
+#pragma clang loop unroll(full)
+			for (int i = 0; i < sizeof(l); i++) {
+				add &= (l & 0xffu) != 0;
+				p += add;
+				l >>= 8;
+			}
+			return p - str;
 		}
 	}
-
-	/* NOTREACHED */
-	return 0;
 }

 /*
--- a/lib/c/string/strndup.c
+++ b/lib/c/string/strndup.c
@ -1,9 +1,8 @@
 /* See the end of this file for copyright and license terms. */

-#include <gay/mm.h>
 #include <gay/types.h>

-#define _POSIX_C_SOURCE 200809L
+#include <stdlib.h>
 #include <string.h>

 char *strndup(const char *str, usize maxlen)
@ -12,7 +11,7 @@ char *strndup(const char *str, usize maxlen)
 	usize len;

 	len = strnlen(str, maxlen);
-	copy = kmalloc(len + 1, MM_KERNEL);
+	copy = malloc(len + 1);
 	if (copy != NULL) {
 		memcpy(copy, str, len);
 		copy[len] = '\0';
--- a/lib/c/string/strtok.c
+++ b/lib/c/string/strtok.c
@ -2,7 +2,7 @@

 #include <gay/cdefs.h>

-#ifndef __KERNEL__
+#ifndef _KERNEL
 #include <string.h>

 char *__strtok_r(char *s, const char *delim, char **last)
@ -59,7 +59,7 @@ char *strtok(char *s, const char *delim)
 	return __strtok_r(s, delim, &last);
 }

-#endif /* not __KERNEL__ */
+#endif /* not _KERNEL */

 /*
 * This file is part of GayBSD.
--- a/lib/c/string/wcsdup.c
+++ b/lib/c/string/wcsdup.c
@ -1,18 +1,23 @@
 /* See the end of this file for copyright and license terms. */

-#include <gay/mm.h>
+#include <gay/arith.h>
 #include <gay/types.h>

+#include <stdlib.h>
 #include <wchar.h>

 wchar_t *wcsdup(const wchar_t *s)
 {
 	wchar_t *copy;
-	size_t len;
+	/* if this would overflow we'd have an entirely different kind of problem */
+	usize len = wcslen(s) + 1;

-	len = wcslen(s) + 1;
-	if ((copy = kmalloc(len * sizeof(wchar_t), MM_KERNEL)) == NULL)
-		return (NULL);
+	usize size;
+	if (mul_overflow(&size, len, sizeof(*copy)))
+		return NULL;
+
+	if ((copy = malloc(size)) == NULL)
+		return NULL;
 	return wmemcpy(copy, s, len);
 }

--- a/lib/c/string/wcslcpy.c
+++ b/lib/c/string/wcslcpy.c
@ -18,7 +18,7 @@ usize wcslcpy(wchar_t *dst, const wchar_t *src, usize siz)
 	/* Copy as many bytes as will fit */
 	if (n != 0 && --n != 0) {
 		do {
-			if ((*d++ = *s++) == 0)
+			if ((*d++ = *s++) == L'\0')
 				break;
 		} while (--n != 0);
 	}
@ -26,7 +26,7 @@ usize wcslcpy(wchar_t *dst, const wchar_t *src, usize siz)
 	/* Not enough room in dst, add NUL and traverse rest of src */
 	if (n == 0) {
 		if (siz != 0)
-			*d = '\0';		/* NUL-terminate dst */
+			*d = L'\0';		/* NUL-terminate dst */
 		while (*s++)
 			/* nothing */;
 	}