lib/x86: add fast memset family of functions
parent
baf03e97a4
commit
bd23d2cbc8
@ -0,0 +1,72 @@
|
||||
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gay/types.h>
|
||||
|
||||
#define __HAVE_ARCH_MEMSET
|
||||
extern void *memset(void *dest, int c, usize n);
|
||||
|
||||
#define __HAVE_ARCH_MEMSET16
|
||||
static inline void *memset16(u16 *dest, u16 val, usize nbyte)
|
||||
{
|
||||
void *dst0 = dest;
|
||||
nbyte /= 2;
|
||||
|
||||
__asm__ volatile(
|
||||
" rep \n"
|
||||
" stosw \n"
|
||||
: "+c"(nbyte), "+D"(dest)
|
||||
: "a"(val)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return dst0;
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_MEMSET32
|
||||
static inline void *memset32(u32 *dest, u32 val, usize nbyte)
|
||||
{
|
||||
void *dst0 = dest;
|
||||
nbyte /= 4;
|
||||
|
||||
__asm__ volatile(
|
||||
" rep \n"
|
||||
" stosl \n"
|
||||
: "+c"(nbyte), "+D"(dest)
|
||||
: "a"(val)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return dst0;
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_MEMSET64
|
||||
static inline void *memset64(u64 *dest, u64 val, usize nbyte)
|
||||
{
|
||||
void *dst0 = dest;
|
||||
nbyte /= 8;
|
||||
|
||||
#ifdef __x86_64__
|
||||
__asm__ volatile(
|
||||
" rep \n"
|
||||
" stosq \n"
|
||||
: "+c"(nbyte), "+D"(dest)
|
||||
: "a"(val)
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
__asm__ volatile(
|
||||
"1: stosl \n"
|
||||
" xchgl %3, %2 \n"
|
||||
" stosl \n"
|
||||
" xchgl %3, %2 \n"
|
||||
" loop 1b \n"
|
||||
: "+c"(nbyte), "+D"(dest)
|
||||
: "a"((u32)val), "r"((u32)(val >> 32))
|
||||
: "memory"
|
||||
);
|
||||
#endif
|
||||
|
||||
return dst0;
|
||||
}
|
@ -0,0 +1,3 @@
|
||||
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
|
||||
|
||||
add_subdirectory("${X86_ARCH}")
|
@ -0,0 +1,5 @@
|
||||
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
|
||||
|
||||
target_sources(gay_arch PRIVATE
|
||||
memset.S
|
||||
)
|
@ -0,0 +1,62 @@
|
||||
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
|
||||
|
||||
#include <asm/common.h>
|
||||
|
||||
/* void *memset(void *dest, int c, usize n) */
|
||||
ENTRY(memset)
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
|
||||
cld
|
||||
|
||||
movq %rdi, %r11 /* save original pointer to dest */
|
||||
|
||||
movl %esi, %eax /* move `c' into correct register for rep;stosq */
|
||||
movq %rdx, %rcx /* move `n' into correct register for rep;stosq */
|
||||
|
||||
cmpq $16, %rcx
|
||||
jb out /* n < 16, not worth the effort */
|
||||
|
||||
testl $1, %edi
|
||||
jz 2f
|
||||
stosb
|
||||
decq %rcx
|
||||
|
||||
/* 2-byte aligned */
|
||||
2: movl %eax, %edx
|
||||
shll $8, %eax
|
||||
orl %edx, %eax /* c |= (c << 8) */
|
||||
|
||||
testl $2, %edi
|
||||
jz 4f
|
||||
stosw
|
||||
subq $2, %rcx
|
||||
|
||||
/* 4-byte aligned */
|
||||
4: movl %eax, %edx
|
||||
shll $16, %eax
|
||||
orl %edx, %eax /* c |= (c << 16) */
|
||||
|
||||
testl $4, %edi
|
||||
jz 8f
|
||||
stosl
|
||||
subq $4, %rcx
|
||||
|
||||
/* 8-byte aligned, now we can fire stosq */
|
||||
8: movl %eax, %edx
|
||||
shlq $32, %rax
|
||||
orq %rdx, %rax /* c |= (c << 32) */
|
||||
movl %ecx, %edx
|
||||
andl $7, %edx /* edx = n % 8 */
|
||||
shrq $3, %rcx /* n /= 8 */
|
||||
rep
|
||||
stosq
|
||||
movl %edx, %ecx
|
||||
|
||||
/* write out remaining bytes (or do the whole memset, if n < 16) */
|
||||
out: rep
|
||||
stosb
|
||||
movq %r11, %rax /* return original pointer to dest */
|
||||
popq %rbp
|
||||
retq
|
||||
END(memset)
|
@ -0,0 +1,5 @@
|
||||
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
|
||||
|
||||
target_sources(gay_arch PRIVATE
|
||||
memset.S
|
||||
)
|
@ -0,0 +1,51 @@
|
||||
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
|
||||
|
||||
#include <asm/common.h>
|
||||
|
||||
/* void *memset(void *dest, int c, usize n) */
|
||||
ENTRY(memset)
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
|
||||
cld
|
||||
|
||||
movl 8(%ebp), %edi /* edi = dest */
|
||||
movl 12(%ebp), %eax /* eax = c */
|
||||
movl 16(%ebp), %ecx /* ecx = n */
|
||||
|
||||
cmpl $16, %ecx
|
||||
jb out /* n < 16, not worth the effort */
|
||||
|
||||
testl $1, %edi
|
||||
jz 2f
|
||||
stosb
|
||||
decl %ecx
|
||||
|
||||
/* 2-byte aligned */
|
||||
2: movl %eax, %edx
|
||||
shll $8, %eax
|
||||
orl %edx, %eax /* c |= (c << 8) */
|
||||
|
||||
testl $2, %edi
|
||||
jz 4f
|
||||
stosw
|
||||
subl $2, %ecx /* n -= 2 */
|
||||
|
||||
/* 4-byte aligned, now we can fire stosl */
|
||||
4: movl %eax, %edx
|
||||
shll $16, %eax
|
||||
orl %edx, %eax /* c |= (c << 16) */
|
||||
movl %ecx, %edx
|
||||
andl $7, %edx /* edx = n % 8 */
|
||||
shrl $2, %ecx /* n /= 4 */
|
||||
rep
|
||||
stosl
|
||||
movl %edx, %ecx
|
||||
|
||||
/* write out remaining bytes (or do the whole memset, if n < 16) */
|
||||
out: rep
|
||||
stosb
|
||||
movl 8(%ebp), %eax /* return original pointer to dest */
|
||||
popl %ebp
|
||||
ret
|
||||
END(memset)
|
Loading…
Reference in New Issue