libc: refactor a couple of string routines
This is just a minor overhaul of several utility functions, in part because it kept bothering me and in part because i was bored.
This commit is contained in:
parent
afbb3743d5
commit
904584ccc0
15 changed files with 195 additions and 69 deletions
2
.vscode/c_cpp_properties.template.json
vendored
2
.vscode/c_cpp_properties.template.json
vendored
|
@ -7,7 +7,7 @@
|
|||
"${workspaceFolder}/build/include"
|
||||
],
|
||||
"defines": [
|
||||
"__KERNEL__",
|
||||
"_KERNEL",
|
||||
"_GAY_SOURCE=202109L"
|
||||
],
|
||||
"compilerArgs": [
|
||||
|
|
|
@ -31,19 +31,19 @@ typedef unsigned long long __uint64_t;
|
|||
#ifdef __LP64__
|
||||
typedef __int32_t __clock_t; /* clock()... */
|
||||
typedef __int64_t __critical_t;
|
||||
#if !defined(__KERNEL__) && !defined(_FREESTANDING)
|
||||
#if !defined(_KERNEL) && !defined(_FREESTANDING)
|
||||
typedef double __double_t;
|
||||
typedef float __float_t;
|
||||
#endif /* not __KERNEL__ or _FREESTANDING */
|
||||
#endif /* not _KERNEL or _FREESTANDING */
|
||||
typedef __int64_t __intfptr_t;
|
||||
typedef __int64_t __intptr_t;
|
||||
#else /* not __LP64__ */
|
||||
typedef unsigned long __clock_t;
|
||||
typedef __int32_t __critical_t;
|
||||
#if !defined(__KERNEL__) && !defined(_FREESTANDING)
|
||||
#if !defined(_KERNEL) && !defined(_FREESTANDING)
|
||||
typedef long double __double_t;
|
||||
typedef long double __float_t;
|
||||
#endif /* not __KERNEL__ or _FREESTANDING */
|
||||
#endif /* not _KERNEL or _FREESTANDING */
|
||||
typedef __int32_t __intfptr_t;
|
||||
typedef __int32_t __intptr_t;
|
||||
#endif /* __LP64__ */
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
* type of compiler warning if they are accidentally mixed up.
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#include <arch/page.h>
|
||||
|
||||
#include <gay/types.h>
|
||||
|
@ -118,6 +120,8 @@ int mem_init(uintptr_t start, uintptr_t end);
|
|||
*/
|
||||
int kmalloc_init(uintptr_t start, uintptr_t end);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
/*
|
||||
* This file is part of GayBSD.
|
||||
* Copyright (c) 2021 fef <owo@fef.moe>.
|
||||
|
|
|
@ -10,11 +10,11 @@
|
|||
* @param a The array
|
||||
* @returns The number of elements
|
||||
*/
|
||||
#define ARRAY_SIZE(a) ( sizeof(a) / sizeof(a[0]) )
|
||||
#define ARRAY_SIZE(a) ( sizeof(a) / sizeof((a)[0]) )
|
||||
|
||||
#define abs(x) ({ \
|
||||
typeof(x) __x = (x); \
|
||||
__x < 0 ? -__x : __x; \
|
||||
__x < 0 ? -__x : +__x; \
|
||||
})
|
||||
|
||||
#define max(a, b) ({ \
|
||||
|
|
34
include/stdlib.h
Normal file
34
include/stdlib.h
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* See the end of this file for copyright and license terms. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gay/cdefs.h>
|
||||
#include <gay/types.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <gay/mm.h>
|
||||
#define malloc(size) kmalloc(size, MM_KERNEL)
|
||||
#efine free(ptr) kfree(ptr)
|
||||
#else
|
||||
/*
|
||||
* i still have to do some include flags fuckery to get this working, for now
|
||||
* these are just implemented in kernel/mm/kmalloc.c as __weak functions because
|
||||
* this libc is *always* linked against the kernel atm.
|
||||
*/
|
||||
extern void *malloc(usize size) __malloc_like __alloc_size(1);
|
||||
extern void free(void *ptr);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This file is part of GayBSD.
|
||||
* Copyright (c) 2021 fef <owo@fef.moe>.
|
||||
*
|
||||
* GayBSD is nonviolent software: you may only use, redistribute, and/or
|
||||
* modify it under the terms of the Cooperative Nonviolent Public License
|
||||
* (CNPL) as found in the LICENSE file in the source code root directory
|
||||
* or at <https://git.pixie.town/thufie/npl-builder>; either version 7
|
||||
* of the license, or (at your option) any later version.
|
||||
*
|
||||
* GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent
|
||||
* permitted by applicable law. See the CNPL for details.
|
||||
*/
|
|
@ -12,8 +12,8 @@
|
|||
* I hope i never have to touch this again.
|
||||
*/
|
||||
|
||||
#include <alloca.h> /* clang */
|
||||
#include <stdarg.h> /* clang */
|
||||
#include <alloca.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include <arch/page.h>
|
||||
|
||||
#include <gay/arith.h>
|
||||
#include <gay/cdefs.h>
|
||||
#include <gay/clist.h>
|
||||
#include <gay/config.h>
|
||||
#include <gay/errno.h>
|
||||
|
@ -205,8 +207,29 @@ void kfree(void *ptr)
|
|||
blk_try_merge(blk);
|
||||
}
|
||||
|
||||
/*
|
||||
* These wrappers are used for linking libc against the kernel itself.
|
||||
* This is a "temporary" hack because i haven't figured out the whole C flags
|
||||
* thingy for properly producing two versions of libc (one static one for the
|
||||
* kernel and a shared one for user space).
|
||||
*/
|
||||
|
||||
__weak void *malloc(usize size)
|
||||
{
|
||||
return kmalloc(size, MM_KERNEL);
|
||||
}
|
||||
|
||||
__weak void free(void *ptr)
|
||||
{
|
||||
kfree(ptr);
|
||||
}
|
||||
|
||||
static inline struct memblk *blk_create(usize num_pages)
|
||||
{
|
||||
usize blksize;
|
||||
if (mul_overflow(&blksize, num_pages, PAGE_SIZE))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* heap_end points to the first address that is not part of the heap
|
||||
* anymore, so that's where the new block starts when we add pages
|
||||
|
@ -215,7 +238,7 @@ static inline struct memblk *blk_create(usize num_pages)
|
|||
if (grow_heap(num_pages) != num_pages)
|
||||
return NULL; /* OOM :( */
|
||||
|
||||
blk_set_size(blk, num_pages * PAGE_SIZE - OVERHEAD);
|
||||
blk_set_size(blk, blksize - OVERHEAD);
|
||||
blk_clear_alloc(blk);
|
||||
blk_set_border_end(blk);
|
||||
|
||||
|
@ -302,7 +325,8 @@ static struct memblk *blk_slice(struct memblk *blk, usize slice_size)
|
|||
* than the full block size.
|
||||
*/
|
||||
usize rest_size = blk_get_size(blk) - slice_size - OVERHEAD;
|
||||
if (rest_size < MIN_SIZE || rest_size + OVERHEAD < rest_size) {
|
||||
bool carry = sub_underflow(&rest_size, blk_get_size(blk), slice_size + OVERHEAD);
|
||||
if (rest_size < MIN_SIZE || carry) {
|
||||
blk_set_alloc(blk);
|
||||
return blk;
|
||||
}
|
||||
|
|
|
@ -333,12 +333,12 @@ __pure char *strchrnul(const char *s, int c);
|
|||
char *strsep(char **stringp, const char *delim);
|
||||
#endif /* _GAY_SOURCE >= 202109L || __BSD_VISIBLE */
|
||||
|
||||
#ifndef __KERNEL__ /* we *really* don't want this anywhere in kernel code */
|
||||
#ifndef _KERNEL /* we *really* don't want this anywhere in kernel code */
|
||||
char *strtok(char *__restrict s, const char *__restrict tok);
|
||||
# if _POSIX_C_SOURCE >= 199506L
|
||||
char *strtok_r(char *s, const char *delim, char **last);
|
||||
# endif /* _POSIX_C_SOURCE >= 199506L */
|
||||
#endif /* not __KERNEL__ */
|
||||
#endif /* not _KERNEL */
|
||||
|
||||
/*
|
||||
* This file is part of GayBSD.
|
||||
|
|
|
@ -1,27 +1,93 @@
|
|||
/* See the end of this file for copyright and license terms. */
|
||||
|
||||
#include <gay/types.h>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* The two and four byte routines use the same trick as strlen(),
|
||||
* see the respective file for details and where it's from.
|
||||
*/
|
||||
|
||||
static inline void fourbyte_strcpy(char *to, const char *from)
|
||||
{
|
||||
/* make sure we are aligned first */
|
||||
#pragma unroll(3)
|
||||
while ((uintptr_t)to % 4) {
|
||||
if ((*to++ = *from++) == '\0')
|
||||
return;
|
||||
}
|
||||
|
||||
/* copy in chunks of 4 bytes until there is a terminator */
|
||||
u32 *to32 = (u32 *)to;
|
||||
const u32 *from32 = (const u32 *)from;
|
||||
while ( ((*from32 - 0x01010101) & (~*from32 & 0x80808080)) == 0 )
|
||||
*to32++ = *from32++;
|
||||
|
||||
to = (char *)to32;
|
||||
from = (const char *)from32;
|
||||
|
||||
/* copy the remaining bytes (can only be within 1 and 4) */
|
||||
#pragma unroll(4)
|
||||
while ((*to++ = *from++) != '\0');
|
||||
/* nothing */
|
||||
}
|
||||
|
||||
static inline void twobyte_strcpy(char *to, const char *from)
|
||||
{
|
||||
if ((uintptr_t)to % 2) {
|
||||
if ((*to++ = *from++) == '\0')
|
||||
return;
|
||||
}
|
||||
|
||||
u16 *to16 = (u16 *)to;
|
||||
const u16 *from16 = (const u16 *)from;
|
||||
while ( ((*from16 - 0x0101) & (~*from16 & 0x8080)) == 0 )
|
||||
*to16++ = *from16++;
|
||||
|
||||
to = (char *)to16;
|
||||
from = (const char *)from16;
|
||||
|
||||
/* copy the remaining bytes (can only be within 1 and 2) */
|
||||
#pragma unroll(2)
|
||||
while ((*to++ = *from++) != '\0');
|
||||
/* nothing */
|
||||
}
|
||||
|
||||
#ifdef WEAK_STRCPY
|
||||
__weak_reference(__strcpy, strcpy);
|
||||
#define STRCPY_NAME __strcpy
|
||||
#else
|
||||
#define STRCPY_NAME strcpy
|
||||
#endif
|
||||
|
||||
char *
|
||||
#ifdef WEAK_STRCPY
|
||||
__strcpy
|
||||
#else
|
||||
strcpy
|
||||
#endif
|
||||
(char *restrict to, const char *restrict from)
|
||||
char *STRCPY_NAME(char *restrict to, const char *restrict from)
|
||||
{
|
||||
char *save = to;
|
||||
|
||||
for (; (*to = *from); ++from, ++to)
|
||||
/* nothing */;
|
||||
/*
|
||||
* Check if the pointers can both be aligned to 2 or 4 bytes and copy
|
||||
* in whole blocks of 2 or 4 bytes respectively if that's the case.
|
||||
* This optimization is utterly useless because i'm almost certainly
|
||||
* gonna write (or "import", rather) dedicated assembly routines but
|
||||
* hey it can't hurt to introduce new sources for bugs right?
|
||||
*/
|
||||
if (((uintptr_t)to % 4) == ((uintptr_t)from % 4)) {
|
||||
fourbyte_strcpy(to, from);
|
||||
} else if (((uintptr_t)to % 2) == ((uintptr_t)from % 2)) {
|
||||
twobyte_strcpy(to, from);
|
||||
} else {
|
||||
while ((*to++ = *from++) != '\0');
|
||||
/* nothing */
|
||||
}
|
||||
|
||||
return save;
|
||||
}
|
||||
|
||||
#ifdef WEAK_STRCPY
|
||||
#include <gay/cdefs.h>
|
||||
__weak __alias(__strcpy) char *strcpy(char *restrict, const char *restrict);
|
||||
#endif /* WEAK_STRCPY */
|
||||
|
||||
/*
|
||||
* This file is part of GayBSD.
|
||||
* Copyright (c) 2021 fef <owo@fef.moe>.
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
/* See the end of this file for copyright and license terms. */
|
||||
|
||||
#include <gay/mm.h>
|
||||
#include <gay/types.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
char *strdup(const char *str)
|
||||
|
@ -10,7 +11,7 @@ char *strdup(const char *str)
|
|||
char *copy;
|
||||
|
||||
len = strlen(str) + 1;
|
||||
if ((copy = kmalloc(len, MM_KERNEL)) == NULL)
|
||||
if ((copy = malloc(len)) == NULL)
|
||||
return NULL;
|
||||
|
||||
memcpy(copy, str, len);
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
*
|
||||
* ((x - 0x01....01) & ~x & 0x80....80)
|
||||
*
|
||||
* would evaluate to a non-zero value iff any of the bytes in the
|
||||
* would evaluate to a non-zero value if any of the bytes in the
|
||||
* original word is zero.
|
||||
*
|
||||
* The algorithm above is found on "Hacker's Delight" by
|
||||
|
@ -34,23 +34,13 @@ static const unsigned long mask80 = 0x8080808080808080;
|
|||
#error "Unsupported word size"
|
||||
#endif
|
||||
|
||||
#define LONGPTR_MASK (sizeof(long) - 1)
|
||||
|
||||
/*
|
||||
* Helper macro to return string length if we caught the zero
|
||||
* byte.
|
||||
*/
|
||||
#define testbyte(x) \
|
||||
do { \
|
||||
if (p[x] == '\0') \
|
||||
return (p - str + x); \
|
||||
} while (0)
|
||||
#define LONGPTR_MASK (sizeof(unsigned long) - 1)
|
||||
|
||||
usize strlen(const char *str)
|
||||
{
|
||||
const char *p;
|
||||
const unsigned long *lp;
|
||||
long va, vb;
|
||||
unsigned long va, vb;
|
||||
|
||||
/*
|
||||
* Before trying the hard (unaligned byte-by-byte access) way
|
||||
|
@ -63,38 +53,41 @@ usize strlen(const char *str)
|
|||
* boundaries is integral multiple of word size.
|
||||
*/
|
||||
lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
|
||||
va = (*lp - mask01); /* NOLINT */
|
||||
vb = ((~*lp) & mask80); /* NOLINT */
|
||||
va = *lp - mask01;
|
||||
vb = (~*lp) & mask80;
|
||||
lp++;
|
||||
if (va & vb) {
|
||||
/* Check if we have \0 in the first part */
|
||||
for (p = str; p < (const char *)lp; p++) {
|
||||
if (*p == '\0')
|
||||
return (p - str);
|
||||
return p - str;
|
||||
}
|
||||
}
|
||||
|
||||
/* Scan the rest of the string using word sized operation */
|
||||
for (; ; lp++) {
|
||||
va = (*lp - mask01); /* NOLINT */
|
||||
vb = ((~*lp) & mask80); /* NOLINT */
|
||||
va = *lp - mask01;
|
||||
vb = (~*lp) & mask80;
|
||||
if (va & vb) {
|
||||
p = (const char *)(lp);
|
||||
testbyte(0);
|
||||
testbyte(1);
|
||||
testbyte(2);
|
||||
testbyte(3);
|
||||
#if (LONG_BIT >= 64)
|
||||
testbyte(4);
|
||||
testbyte(5);
|
||||
testbyte(6);
|
||||
testbyte(7);
|
||||
#endif
|
||||
/*
|
||||
* The original version from FreeBSD uses 4 (or 8) if
|
||||
* branches querying every byte sequentially here.
|
||||
* I was bored and rewrote it branchless, but that
|
||||
* probably doesn't help in terms of performance
|
||||
* because clang's optimizer is better anyway.
|
||||
*/
|
||||
p = (const char *)lp;
|
||||
unsigned long l = *lp;
|
||||
unsigned long add = 1;
|
||||
#pragma clang loop unroll(full)
|
||||
for (int i = 0; i < sizeof(l); i++) {
|
||||
add &= (l & 0xffu) != 0;
|
||||
p += add;
|
||||
l >>= 8;
|
||||
}
|
||||
return p - str;
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTREACHED */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
/* See the end of this file for copyright and license terms. */
|
||||
|
||||
#include <gay/mm.h>
|
||||
#include <gay/types.h>
|
||||
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
char *strndup(const char *str, usize maxlen)
|
||||
|
@ -12,7 +11,7 @@ char *strndup(const char *str, usize maxlen)
|
|||
usize len;
|
||||
|
||||
len = strnlen(str, maxlen);
|
||||
copy = kmalloc(len + 1, MM_KERNEL);
|
||||
copy = malloc(len + 1);
|
||||
if (copy != NULL) {
|
||||
memcpy(copy, str, len);
|
||||
copy[len] = '\0';
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
#include <gay/cdefs.h>
|
||||
|
||||
#ifndef __KERNEL__
|
||||
#ifndef _KERNEL
|
||||
#include <string.h>
|
||||
|
||||
char *__strtok_r(char *s, const char *delim, char **last)
|
||||
|
@ -59,7 +59,7 @@ char *strtok(char *s, const char *delim)
|
|||
return __strtok_r(s, delim, &last);
|
||||
}
|
||||
|
||||
#endif /* not __KERNEL__ */
|
||||
#endif /* not _KERNEL */
|
||||
|
||||
/*
|
||||
* This file is part of GayBSD.
|
||||
|
|
|
@ -1,18 +1,23 @@
|
|||
/* See the end of this file for copyright and license terms. */
|
||||
|
||||
#include <gay/mm.h>
|
||||
#include <gay/arith.h>
|
||||
#include <gay/types.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <wchar.h>
|
||||
|
||||
wchar_t *wcsdup(const wchar_t *s)
|
||||
{
|
||||
wchar_t *copy;
|
||||
size_t len;
|
||||
/* if this would overflow we'd have an entirely different kind of problem */
|
||||
usize len = wcslen(s) + 1;
|
||||
|
||||
len = wcslen(s) + 1;
|
||||
if ((copy = kmalloc(len * sizeof(wchar_t), MM_KERNEL)) == NULL)
|
||||
return (NULL);
|
||||
usize size;
|
||||
if (mul_overflow(&size, len, sizeof(*copy)))
|
||||
return NULL;
|
||||
|
||||
if ((copy = malloc(size)) == NULL)
|
||||
return NULL;
|
||||
return wmemcpy(copy, s, len);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ usize wcslcpy(wchar_t *dst, const wchar_t *src, usize siz)
|
|||
/* Copy as many bytes as will fit */
|
||||
if (n != 0 && --n != 0) {
|
||||
do {
|
||||
if ((*d++ = *s++) == 0)
|
||||
if ((*d++ = *s++) == L'\0')
|
||||
break;
|
||||
} while (--n != 0);
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ usize wcslcpy(wchar_t *dst, const wchar_t *src, usize siz)
|
|||
/* Not enough room in dst, add NUL and traverse rest of src */
|
||||
if (n == 0) {
|
||||
if (siz != 0)
|
||||
*d = '\0'; /* NUL-terminate dst */
|
||||
*d = L'\0'; /* NUL-terminate dst */
|
||||
while (*s++)
|
||||
/* nothing */;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue