From bd23d2cbc8bab7c6e619934930aed5ace70ebcd4 Mon Sep 17 00:00:00 2001 From: fef Date: Wed, 22 Dec 2021 22:24:55 +0100 Subject: [PATCH] lib/x86: add fast memset family of functions --- arch/x86/CMakeLists.txt | 1 + arch/x86/include/arch/string/memset.h | 72 +++++++++++++++++++++++++++ arch/x86/lib/CMakeLists.txt | 3 ++ arch/x86/lib/amd64/CMakeLists.txt | 5 ++ arch/x86/lib/amd64/memset.S | 62 +++++++++++++++++++++++ arch/x86/lib/i386/CMakeLists.txt | 5 ++ arch/x86/lib/i386/memset.S | 51 +++++++++++++++++++ lib/c/string/memset.c | 63 ++++++++++++++++++++++- 8 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/arch/string/memset.h create mode 100644 arch/x86/lib/CMakeLists.txt create mode 100644 arch/x86/lib/amd64/CMakeLists.txt create mode 100644 arch/x86/lib/amd64/memset.S create mode 100644 arch/x86/lib/i386/CMakeLists.txt create mode 100644 arch/x86/lib/i386/memset.S diff --git a/arch/x86/CMakeLists.txt b/arch/x86/CMakeLists.txt index 7c8b9c0..a16f330 100644 --- a/arch/x86/CMakeLists.txt +++ b/arch/x86/CMakeLists.txt @@ -6,5 +6,6 @@ target_compile_definitions(gay_arch PUBLIC ${GAY_KERNEL_DEFINITIONS}) target_link_libraries(gay_arch PRIVATE c gay_kernel) add_subdirectory(boot) +add_subdirectory(lib) add_subdirectory(mm) add_subdirectory(sys) diff --git a/arch/x86/include/arch/string/memset.h b/arch/x86/include/arch/string/memset.h new file mode 100644 index 0000000..a3d05bb --- /dev/null +++ b/arch/x86/include/arch/string/memset.h @@ -0,0 +1,72 @@ +/* Copyright (C) 2021 fef . All rights reserved. */ + +#pragma once + +#include + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *dest, int c, usize n); + +#define __HAVE_ARCH_MEMSET16 +static inline void *memset16(u16 *dest, u16 val, usize nbyte) +{ + void *dst0 = dest; + nbyte /= 2; + + __asm__ volatile( +" rep \n" +" stosw \n" + : "+c"(nbyte), "+D"(dest) + : "a"(val) + : "memory" + ); + + return dst0; +} + +#define __HAVE_ARCH_MEMSET32 +static inline void *memset32(u32 *dest, u32 val, usize nbyte) +{ + void *dst0 = dest; + nbyte /= 4; + + __asm__ volatile( +" rep \n" +" stosl \n" + : "+c"(nbyte), "+D"(dest) + : "a"(val) + : "memory" + ); + + return dst0; +} + +#define __HAVE_ARCH_MEMSET64 +static inline void *memset64(u64 *dest, u64 val, usize nbyte) +{ + void *dst0 = dest; + nbyte /= 8; + +#ifdef __x86_64__ + __asm__ volatile( +" rep \n" +" stosq \n" + : "+c"(nbyte), "+D"(dest) + : "a"(val) + : "memory" + ); +#else + __asm__ volatile( +"1: stosl \n" +" xchgl %3, %2 \n" +" stosl \n" +" xchgl %3, %2 \n" +" loop 1b \n" + : "+c"(nbyte), "+D"(dest) + : "a"((u32)val), "r"((u32)(val >> 32)) + : "memory" + ); +#endif + + return dst0; +} diff --git a/arch/x86/lib/CMakeLists.txt b/arch/x86/lib/CMakeLists.txt new file mode 100644 index 0000000..388a8a1 --- /dev/null +++ b/arch/x86/lib/CMakeLists.txt @@ -0,0 +1,3 @@ +# Copyright (C) 2021 fef . All rights reserved. + +add_subdirectory("${X86_ARCH}") diff --git a/arch/x86/lib/amd64/CMakeLists.txt b/arch/x86/lib/amd64/CMakeLists.txt new file mode 100644 index 0000000..763d585 --- /dev/null +++ b/arch/x86/lib/amd64/CMakeLists.txt @@ -0,0 +1,5 @@ +# Copyright (C) 2021 fef . All rights reserved. + +target_sources(gay_arch PRIVATE + memset.S +) diff --git a/arch/x86/lib/amd64/memset.S b/arch/x86/lib/amd64/memset.S new file mode 100644 index 0000000..f5087a6 --- /dev/null +++ b/arch/x86/lib/amd64/memset.S @@ -0,0 +1,62 @@ +/* Copyright (C) 2021 fef . All rights reserved. */ + +#include + +/* void *memset(void *dest, int c, usize n) */ +ENTRY(memset) + pushq %rbp + movq %rsp, %rbp + + cld + + movq %rdi, %r11 /* save original pointer to dest */ + + movl %esi, %eax /* move `c' into correct register for rep;stosq */ + movq %rdx, %rcx /* move `n' into correct register for rep;stosq */ + + cmpq $16, %rcx + jb out /* n < 16, not worth the effort */ + + testl $1, %edi + jz 2f + stosb + decq %rcx + + /* 2-byte aligned */ +2: movl %eax, %edx + shll $8, %eax + orl %edx, %eax /* c |= (c << 8) */ + + testl $2, %edi + jz 4f + stosw + subq $2, %rcx + + /* 4-byte aligned */ +4: movl %eax, %edx + shll $16, %eax + orl %edx, %eax /* c |= (c << 16) */ + + testl $4, %edi + jz 8f + stosl + subq $4, %rcx + + /* 8-byte aligned, now we can fire stosq */ +8: movl %eax, %edx + shlq $32, %rax + orq %rdx, %rax /* c |= (c << 32) */ + movl %ecx, %edx + andl $7, %edx /* edx = n % 8 */ + shrq $3, %rcx /* n /= 8 */ + rep + stosq + movl %edx, %ecx + + /* write out remaining bytes (or do the whole memset, if n < 16) */ +out: rep + stosb + movq %r11, %rax /* return original pointer to dest */ + popq %rbp + retq +END(memset) diff --git a/arch/x86/lib/i386/CMakeLists.txt b/arch/x86/lib/i386/CMakeLists.txt new file mode 100644 index 0000000..763d585 --- /dev/null +++ b/arch/x86/lib/i386/CMakeLists.txt @@ -0,0 +1,5 @@ +# Copyright (C) 2021 fef . All rights reserved. + +target_sources(gay_arch PRIVATE + memset.S +) diff --git a/arch/x86/lib/i386/memset.S b/arch/x86/lib/i386/memset.S new file mode 100644 index 0000000..2bee148 --- /dev/null +++ b/arch/x86/lib/i386/memset.S @@ -0,0 +1,51 @@ +/* Copyright (C) 2021 fef . All rights reserved. */ + +#include + +/* void *memset(void *dest, int c, usize n) */ +ENTRY(memset) + pushl %ebp + movl %esp, %ebp + + cld + + movl 8(%ebp), %edi /* edi = dest */ + movl 12(%ebp), %eax /* eax = c */ + movl 16(%ebp), %ecx /* ecx = n */ + + cmpl $16, %ecx + jb out /* n < 16, not worth the effort */ + + testl $1, %edi + jz 2f + stosb + decl %ecx + + /* 2-byte aligned */ +2: movl %eax, %edx + shll $8, %eax + orl %edx, %eax /* c |= (c << 8) */ + + testl $2, %edi + jz 4f + stosw + subl $2, %ecx /* n -= 2 */ + + /* 4-byte aligned, now we can fire stosl */ +4: movl %eax, %edx + shll $16, %eax + orl %edx, %eax /* c |= (c << 16) */ + movl %ecx, %edx + andl $7, %edx /* edx = n % 8 */ + shrl $2, %ecx /* n /= 4 */ + rep + stosl + movl %edx, %ecx + + /* write out remaining bytes (or do the whole memset, if n < 16) */ +out: rep + stosb + movl 8(%ebp), %eax /* return original pointer to dest */ + popl %ebp + ret +END(memset) diff --git a/lib/c/string/memset.c b/lib/c/string/memset.c index 0ae1406..fd5a597 100644 --- a/lib/c/string/memset.c +++ b/lib/c/string/memset.c @@ -7,6 +7,8 @@ #define wsize sizeof(unsigned long) #define wmask (wsize - 1) +/* XXX get this mess sorted out */ + #ifdef BZERO #include @@ -15,15 +17,19 @@ #define WIDEVAL 0 void bzero(void *dst0, usize length) -#else +#else /* not BZERO */ #include #define RETURN return (dst0) #define VAL c0 #define WIDEVAL c +#ifdef __HAVE_ARCH_MEMSET +void *__memset(void *dst0, int c0, usize length) +#else void *memset(void *dst0, int c0, usize length) #endif +#endif /* not BZERO */ { usize t; #ifndef BZERO @@ -93,6 +99,61 @@ void *memset(void *dst0, int c0, usize length) RETURN; } +#ifndef BZERO +#ifndef __HAVE_ARCH_MEMSET16 +void *memset16(u16 *dest, u16 val, usize nbyte) +{ + void *dst0 = dest; + + if (nbyte >= 2) { + if ((uintptr_t)dest % 4) + *dest++ = val; + nbyte -= 2; + if (nbyte) + memset32((u32 *)dest, val | ((u32)val << 16), nbyte); + } + + return dst0; +} +#endif /* !__HAVE_ARCH_MEMSET16 */ + +#ifndef __HAVE_ARCH_MEMSET32 +void memset32(u32 *dest, u32 val, usize nbyte) +{ + void *dst0 = dest; + + if (nbyte >= 4) { +#if LONG_BIT >= 64 + if ((uintptr_t)dest % 8) + *dest++ = val; + nbyte -= 4; + if (nbyte >= 8) + return memset64((u64 *)dest, val | ((u64)val << 32), nbyte); +#else + usize n = nbyte / 4; + while (n--) + *dest++ = val; +#endif + } + + return dst0; +} +#endif /* !__HAVE_ARCH_MEMSET32 */ + +#ifndef __HAVE_ARCH_MEMSET64 +void memset64(u64 *dest, u64 val, usize nbyte) +{ + void *dst0 = dest; + + usize n = nbyte / 8; + while (n--) + *dest++ = val; + + return dst0; +} +#endif /* !__HAVE_ARCH_MEMSET64 */ +#endif + /* * Copyright (c) 2021 fef * Copyright (c) 1990, 1993