lib/x86: add fast memset family of functions
This commit is contained in:
parent
baf03e97a4
commit
bd23d2cbc8
8 changed files with 261 additions and 1 deletions
|
@ -6,5 +6,6 @@ target_compile_definitions(gay_arch PUBLIC ${GAY_KERNEL_DEFINITIONS})
|
|||
target_link_libraries(gay_arch PRIVATE c gay_kernel)
|
||||
|
||||
add_subdirectory(boot)
|
||||
add_subdirectory(lib)
|
||||
add_subdirectory(mm)
|
||||
add_subdirectory(sys)
|
||||
|
|
72
arch/x86/include/arch/string/memset.h
Normal file
72
arch/x86/include/arch/string/memset.h
Normal file
|
@ -0,0 +1,72 @@
|
|||
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gay/types.h>
|
||||
|
||||
#define __HAVE_ARCH_MEMSET
|
||||
extern void *memset(void *dest, int c, usize n);
|
||||
|
||||
#define __HAVE_ARCH_MEMSET16
|
||||
static inline void *memset16(u16 *dest, u16 val, usize nbyte)
|
||||
{
|
||||
void *dst0 = dest;
|
||||
nbyte /= 2;
|
||||
|
||||
__asm__ volatile(
|
||||
" rep \n"
|
||||
" stosw \n"
|
||||
: "+c"(nbyte), "+D"(dest)
|
||||
: "a"(val)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return dst0;
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_MEMSET32
|
||||
static inline void *memset32(u32 *dest, u32 val, usize nbyte)
|
||||
{
|
||||
void *dst0 = dest;
|
||||
nbyte /= 4;
|
||||
|
||||
__asm__ volatile(
|
||||
" rep \n"
|
||||
" stosl \n"
|
||||
: "+c"(nbyte), "+D"(dest)
|
||||
: "a"(val)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return dst0;
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_MEMSET64
|
||||
static inline void *memset64(u64 *dest, u64 val, usize nbyte)
|
||||
{
|
||||
void *dst0 = dest;
|
||||
nbyte /= 8;
|
||||
|
||||
#ifdef __x86_64__
|
||||
__asm__ volatile(
|
||||
" rep \n"
|
||||
" stosq \n"
|
||||
: "+c"(nbyte), "+D"(dest)
|
||||
: "a"(val)
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
__asm__ volatile(
|
||||
"1: stosl \n"
|
||||
" xchgl %3, %2 \n"
|
||||
" stosl \n"
|
||||
" xchgl %3, %2 \n"
|
||||
" loop 1b \n"
|
||||
: "+c"(nbyte), "+D"(dest)
|
||||
: "a"((u32)val), "r"((u32)(val >> 32))
|
||||
: "memory"
|
||||
);
|
||||
#endif
|
||||
|
||||
return dst0;
|
||||
}
|
3
arch/x86/lib/CMakeLists.txt
Normal file
3
arch/x86/lib/CMakeLists.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
|
||||
|
||||
add_subdirectory("${X86_ARCH}")
|
5
arch/x86/lib/amd64/CMakeLists.txt
Normal file
5
arch/x86/lib/amd64/CMakeLists.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
|
||||
|
||||
target_sources(gay_arch PRIVATE
|
||||
memset.S
|
||||
)
|
62
arch/x86/lib/amd64/memset.S
Normal file
62
arch/x86/lib/amd64/memset.S
Normal file
|
@ -0,0 +1,62 @@
|
|||
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
|
||||
|
||||
#include <asm/common.h>
|
||||
|
||||
/* void *memset(void *dest, int c, usize n) */
|
||||
ENTRY(memset)
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
|
||||
cld
|
||||
|
||||
movq %rdi, %r11 /* save original pointer to dest */
|
||||
|
||||
movl %esi, %eax /* move `c' into correct register for rep;stosq */
|
||||
movq %rdx, %rcx /* move `n' into correct register for rep;stosq */
|
||||
|
||||
cmpq $16, %rcx
|
||||
jb out /* n < 16, not worth the effort */
|
||||
|
||||
testl $1, %edi
|
||||
jz 2f
|
||||
stosb
|
||||
decq %rcx
|
||||
|
||||
/* 2-byte aligned */
|
||||
2: movl %eax, %edx
|
||||
shll $8, %eax
|
||||
orl %edx, %eax /* c |= (c << 8) */
|
||||
|
||||
testl $2, %edi
|
||||
jz 4f
|
||||
stosw
|
||||
subq $2, %rcx
|
||||
|
||||
/* 4-byte aligned */
|
||||
4: movl %eax, %edx
|
||||
shll $16, %eax
|
||||
orl %edx, %eax /* c |= (c << 16) */
|
||||
|
||||
testl $4, %edi
|
||||
jz 8f
|
||||
stosl
|
||||
subq $4, %rcx
|
||||
|
||||
/* 8-byte aligned, now we can fire stosq */
|
||||
8: movl %eax, %edx
|
||||
shlq $32, %rax
|
||||
orq %rdx, %rax /* c |= (c << 32) */
|
||||
movl %ecx, %edx
|
||||
andl $7, %edx /* edx = n % 8 */
|
||||
shrq $3, %rcx /* n /= 8 */
|
||||
rep
|
||||
stosq
|
||||
movl %edx, %ecx
|
||||
|
||||
/* write out remaining bytes (or do the whole memset, if n < 16) */
|
||||
out: rep
|
||||
stosb
|
||||
movq %r11, %rax /* return original pointer to dest */
|
||||
popq %rbp
|
||||
retq
|
||||
END(memset)
|
5
arch/x86/lib/i386/CMakeLists.txt
Normal file
5
arch/x86/lib/i386/CMakeLists.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
|
||||
|
||||
target_sources(gay_arch PRIVATE
|
||||
memset.S
|
||||
)
|
51
arch/x86/lib/i386/memset.S
Normal file
51
arch/x86/lib/i386/memset.S
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
|
||||
|
||||
#include <asm/common.h>
|
||||
|
||||
/* void *memset(void *dest, int c, usize n) */
|
||||
ENTRY(memset)
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
|
||||
cld
|
||||
|
||||
movl 8(%ebp), %edi /* edi = dest */
|
||||
movl 12(%ebp), %eax /* eax = c */
|
||||
movl 16(%ebp), %ecx /* ecx = n */
|
||||
|
||||
cmpl $16, %ecx
|
||||
jb out /* n < 16, not worth the effort */
|
||||
|
||||
testl $1, %edi
|
||||
jz 2f
|
||||
stosb
|
||||
decl %ecx
|
||||
|
||||
/* 2-byte aligned */
|
||||
2: movl %eax, %edx
|
||||
shll $8, %eax
|
||||
orl %edx, %eax /* c |= (c << 8) */
|
||||
|
||||
testl $2, %edi
|
||||
jz 4f
|
||||
stosw
|
||||
subl $2, %ecx /* n -= 2 */
|
||||
|
||||
/* 4-byte aligned, now we can fire stosl */
|
||||
4: movl %eax, %edx
|
||||
shll $16, %eax
|
||||
orl %edx, %eax /* c |= (c << 16) */
|
||||
movl %ecx, %edx
|
||||
andl $7, %edx /* edx = n % 8 */
|
||||
shrl $2, %ecx /* n /= 4 */
|
||||
rep
|
||||
stosl
|
||||
movl %edx, %ecx
|
||||
|
||||
/* write out remaining bytes (or do the whole memset, if n < 16) */
|
||||
out: rep
|
||||
stosb
|
||||
movl 8(%ebp), %eax /* return original pointer to dest */
|
||||
popl %ebp
|
||||
ret
|
||||
END(memset)
|
|
@ -7,6 +7,8 @@
|
|||
#define wsize sizeof(unsigned long)
|
||||
#define wmask (wsize - 1)
|
||||
|
||||
/* XXX get this mess sorted out */
|
||||
|
||||
#ifdef BZERO
|
||||
#include <strings.h>
|
||||
|
||||
|
@ -15,15 +17,19 @@
|
|||
#define WIDEVAL 0
|
||||
|
||||
void bzero(void *dst0, usize length)
|
||||
#else
|
||||
#else /* not BZERO */
|
||||
#include <string.h>
|
||||
|
||||
#define RETURN return (dst0)
|
||||
#define VAL c0
|
||||
#define WIDEVAL c
|
||||
|
||||
#ifdef __HAVE_ARCH_MEMSET
|
||||
void *__memset(void *dst0, int c0, usize length)
|
||||
#else
|
||||
void *memset(void *dst0, int c0, usize length)
|
||||
#endif
|
||||
#endif /* not BZERO */
|
||||
{
|
||||
usize t;
|
||||
#ifndef BZERO
|
||||
|
@ -93,6 +99,61 @@ void *memset(void *dst0, int c0, usize length)
|
|||
RETURN;
|
||||
}
|
||||
|
||||
#ifndef BZERO
|
||||
#ifndef __HAVE_ARCH_MEMSET16
|
||||
void *memset16(u16 *dest, u16 val, usize nbyte)
|
||||
{
|
||||
void *dst0 = dest;
|
||||
|
||||
if (nbyte >= 2) {
|
||||
if ((uintptr_t)dest % 4)
|
||||
*dest++ = val;
|
||||
nbyte -= 2;
|
||||
if (nbyte)
|
||||
memset32((u32 *)dest, val | ((u32)val << 16), nbyte);
|
||||
}
|
||||
|
||||
return dst0;
|
||||
}
|
||||
#endif /* !__HAVE_ARCH_MEMSET16 */
|
||||
|
||||
#ifndef __HAVE_ARCH_MEMSET32
|
||||
void memset32(u32 *dest, u32 val, usize nbyte)
|
||||
{
|
||||
void *dst0 = dest;
|
||||
|
||||
if (nbyte >= 4) {
|
||||
#if LONG_BIT >= 64
|
||||
if ((uintptr_t)dest % 8)
|
||||
*dest++ = val;
|
||||
nbyte -= 4;
|
||||
if (nbyte >= 8)
|
||||
return memset64((u64 *)dest, val | ((u64)val << 32), nbyte);
|
||||
#else
|
||||
usize n = nbyte / 4;
|
||||
while (n--)
|
||||
*dest++ = val;
|
||||
#endif
|
||||
}
|
||||
|
||||
return dst0;
|
||||
}
|
||||
#endif /* !__HAVE_ARCH_MEMSET32 */
|
||||
|
||||
#ifndef __HAVE_ARCH_MEMSET64
|
||||
void memset64(u64 *dest, u64 val, usize nbyte)
|
||||
{
|
||||
void *dst0 = dest;
|
||||
|
||||
usize n = nbyte / 8;
|
||||
while (n--)
|
||||
*dest++ = val;
|
||||
|
||||
return dst0;
|
||||
}
|
||||
#endif /* !__HAVE_ARCH_MEMSET64 */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021 fef <owo@fef.moe>
|
||||
* Copyright (c) 1990, 1993
|
||||
|
|
Loading…
Reference in a new issue