lib/x86: add fast memset family of functions

main
anna 2 years ago
parent baf03e97a4
commit bd23d2cbc8
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -6,5 +6,6 @@ target_compile_definitions(gay_arch PUBLIC ${GAY_KERNEL_DEFINITIONS})
target_link_libraries(gay_arch PRIVATE c gay_kernel)
add_subdirectory(boot)
add_subdirectory(lib)
add_subdirectory(mm)
add_subdirectory(sys)

@ -0,0 +1,72 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#pragma once
#include <gay/types.h>
#define __HAVE_ARCH_MEMSET
extern void *memset(void *dest, int c, usize n);
#define __HAVE_ARCH_MEMSET16
static inline void *memset16(u16 *dest, u16 val, usize nbyte)
{
void *dst0 = dest;
nbyte /= 2;
__asm__ volatile(
" rep \n"
" stosw \n"
: "+c"(nbyte), "+D"(dest)
: "a"(val)
: "memory"
);
return dst0;
}
#define __HAVE_ARCH_MEMSET32
static inline void *memset32(u32 *dest, u32 val, usize nbyte)
{
void *dst0 = dest;
nbyte /= 4;
__asm__ volatile(
" rep \n"
" stosl \n"
: "+c"(nbyte), "+D"(dest)
: "a"(val)
: "memory"
);
return dst0;
}
#define __HAVE_ARCH_MEMSET64
static inline void *memset64(u64 *dest, u64 val, usize nbyte)
{
void *dst0 = dest;
nbyte /= 8;
#ifdef __x86_64__
__asm__ volatile(
" rep \n"
" stosq \n"
: "+c"(nbyte), "+D"(dest)
: "a"(val)
: "memory"
);
#else
__asm__ volatile(
"1: stosl \n"
" xchgl %3, %2 \n"
" stosl \n"
" xchgl %3, %2 \n"
" loop 1b \n"
: "+c"(nbyte), "+D"(dest)
: "a"((u32)val), "r"((u32)(val >> 32))
: "memory"
);
#endif
return dst0;
}

@ -0,0 +1,3 @@
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
add_subdirectory("${X86_ARCH}")

@ -0,0 +1,5 @@
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
target_sources(gay_arch PRIVATE
memset.S
)

@ -0,0 +1,62 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#include <asm/common.h>
/* void *memset(void *dest, int c, usize n) */
ENTRY(memset)
pushq %rbp
movq %rsp, %rbp
cld
movq %rdi, %r11 /* save original pointer to dest */
movl %esi, %eax /* move `c' into correct register for rep;stosq */
movq %rdx, %rcx /* move `n' into correct register for rep;stosq */
cmpq $16, %rcx
jb out /* n < 16, not worth the effort */
testl $1, %edi
jz 2f
stosb
decq %rcx
/* 2-byte aligned */
2: movl %eax, %edx
shll $8, %eax
orl %edx, %eax /* c |= (c << 8) */
testl $2, %edi
jz 4f
stosw
subq $2, %rcx
/* 4-byte aligned */
4: movl %eax, %edx
shll $16, %eax
orl %edx, %eax /* c |= (c << 16) */
testl $4, %edi
jz 8f
stosl
subq $4, %rcx
/* 8-byte aligned, now we can fire stosq */
8: movl %eax, %edx
shlq $32, %rax
orq %rdx, %rax /* c |= (c << 32) */
movl %ecx, %edx
andl $7, %edx /* edx = n % 8 */
shrq $3, %rcx /* n /= 8 */
rep
stosq
movl %edx, %ecx
/* write out remaining bytes (or do the whole memset, if n < 16) */
out: rep
stosb
movq %r11, %rax /* return original pointer to dest */
popq %rbp
retq
END(memset)

@ -0,0 +1,5 @@
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
target_sources(gay_arch PRIVATE
memset.S
)

@ -0,0 +1,51 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#include <asm/common.h>
/* void *memset(void *dest, int c, usize n) */
ENTRY(memset)
pushl %ebp
movl %esp, %ebp
cld
movl 8(%ebp), %edi /* edi = dest */
movl 12(%ebp), %eax /* eax = c */
movl 16(%ebp), %ecx /* ecx = n */
cmpl $16, %ecx
jb out /* n < 16, not worth the effort */
testl $1, %edi
jz 2f
stosb
decl %ecx
/* 2-byte aligned */
2: movl %eax, %edx
shll $8, %eax
orl %edx, %eax /* c |= (c << 8) */
testl $2, %edi
jz 4f
stosw
subl $2, %ecx /* n -= 2 */
/* 4-byte aligned, now we can fire stosl */
4: movl %eax, %edx
shll $16, %eax
orl %edx, %eax /* c |= (c << 16) */
movl %ecx, %edx
andl $7, %edx /* edx = n % 8 */
shrl $2, %ecx /* n /= 4 */
rep
stosl
movl %edx, %ecx
/* write out remaining bytes (or do the whole memset, if n < 16) */
out: rep
stosb
movl 8(%ebp), %eax /* return original pointer to dest */
popl %ebp
ret
END(memset)

@ -7,6 +7,8 @@
#define wsize sizeof(unsigned long)
#define wmask (wsize - 1)
/* XXX get this mess sorted out */
#ifdef BZERO
#include <strings.h>
@ -15,15 +17,19 @@
#define WIDEVAL 0
void bzero(void *dst0, usize length)
#else
#else /* not BZERO */
#include <string.h>
#define RETURN return (dst0)
#define VAL c0
#define WIDEVAL c
#ifdef __HAVE_ARCH_MEMSET
void *__memset(void *dst0, int c0, usize length)
#else
void *memset(void *dst0, int c0, usize length)
#endif
#endif /* not BZERO */
{
usize t;
#ifndef BZERO
@ -93,6 +99,61 @@ void *memset(void *dst0, int c0, usize length)
RETURN;
}
#ifndef BZERO
#ifndef __HAVE_ARCH_MEMSET16
void *memset16(u16 *dest, u16 val, usize nbyte)
{
void *dst0 = dest;
if (nbyte >= 2) {
if ((uintptr_t)dest % 4)
*dest++ = val;
nbyte -= 2;
if (nbyte)
memset32((u32 *)dest, val | ((u32)val << 16), nbyte);
}
return dst0;
}
#endif /* !__HAVE_ARCH_MEMSET16 */
#ifndef __HAVE_ARCH_MEMSET32
void memset32(u32 *dest, u32 val, usize nbyte)
{
void *dst0 = dest;
if (nbyte >= 4) {
#if LONG_BIT >= 64
if ((uintptr_t)dest % 8)
*dest++ = val;
nbyte -= 4;
if (nbyte >= 8)
return memset64((u64 *)dest, val | ((u64)val << 32), nbyte);
#else
usize n = nbyte / 4;
while (n--)
*dest++ = val;
#endif
}
return dst0;
}
#endif /* !__HAVE_ARCH_MEMSET32 */
#ifndef __HAVE_ARCH_MEMSET64
void memset64(u64 *dest, u64 val, usize nbyte)
{
void *dst0 = dest;
usize n = nbyte / 8;
while (n--)
*dest++ = val;
return dst0;
}
#endif /* !__HAVE_ARCH_MEMSET64 */
#endif
/*
* Copyright (c) 2021 fef <owo@fef.moe>
* Copyright (c) 1990, 1993

Loading…
Cancel
Save