From b4ed811920698530836eb9ab66104b9cd33fa622 Mon Sep 17 00:00:00 2001 From: fef Date: Sun, 2 Jan 2022 05:44:46 +0100 Subject: [PATCH] mm: refactor page allocator This is hopefully the last time in a while that something in the mm subsystem needs a refactor this large. There are two main changes: - The page frame allocator returns a vm_page_t rather than a virtual address. - Data for the slab allocator is now stored in struct vm_page, which means there is no overhead in the slab itself so the space is used in a more efficient manner. --- arch/x86/boot/setup32.S | 2 +- arch/x86/boot/setup64.S | 6 +- arch/x86/boot/util.S | 4 - arch/x86/include/arch/page.h | 4 +- arch/x86/include/arch/string.h | 5 + arch/x86/mm/amd64/init.c | 55 +++++--- arch/x86/mm/amd64/page.c | 22 ++- arch/x86/mm/i386/page.c | 18 +-- doc/amd64/memory.md | 14 +- include/gay/cdefs.h | 3 + include/gay/mm.h | 98 ++++++------- include/gay/vm/page.h | 68 ++++----- kernel/mm/boot.c | 3 +- kernel/mm/page.c | 248 +++++++++++++++------------------ kernel/mm/slab.c | 140 ++++++++----------- lib/c/include/string.h | 25 ++++ 16 files changed, 370 insertions(+), 345 deletions(-) create mode 100644 arch/x86/include/arch/string.h diff --git a/arch/x86/boot/setup32.S b/arch/x86/boot/setup32.S index 8f6239d..96ed3f4 100644 --- a/arch/x86/boot/setup32.S +++ b/arch/x86/boot/setup32.S @@ -116,7 +116,7 @@ ENTRY(_setup) * because the page directory is being interpreted as a page table. * This allows us to manipulate the table while we are in virtual memory. */ - movl $(PADDR(pd0) + 0x003), PADDR(pd0) + 1023 * 4 /* 0xffc00000 */ + movl $(PADDR(pd0) + 0x013), PADDR(pd0) + 1023 * 4 /* 0xffc00000 */ /* set the Page Size Extensions (4) and Page Global Enable (7) bits in cr4 */ mov %cr4, %ecx diff --git a/arch/x86/boot/setup64.S b/arch/x86/boot/setup64.S index 35881f8..2243f2f 100644 --- a/arch/x86/boot/setup64.S +++ b/arch/x86/boot/setup64.S @@ -160,11 +160,11 @@ ENTRY(_setup) movl $0x00000083, PADDR(_pdpt0 + PDPT_OFFSET(KERNBASE)) movl $0x40000083, PADDR(_pdpt0 + PDPT_OFFSET(KERNBASE + 0x40000000)) - movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t) /* present (0), write (1), huge (7) */ + movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t) /* present (0), write (1) */ movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t + PML4T_OFFSET(KERNBASE)) - /* map the PML4 to itself */ - movl $PADDR(_pml4t + 0x003), PADDR(_pml4t + PML4T_OFFSET(X86_PMAP_OFFSET)) + /* map the PML4 to itself (set the cache disable bit (4)) */ + movl $PADDR(_pml4t + 0x013), PADDR(_pml4t + PML4T_OFFSET(X86_PMAP_OFFSET)) movb $0x80, PADDR(_pml4t + PML4T_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */ /* diff --git a/arch/x86/boot/util.S b/arch/x86/boot/util.S index 144a976..2eb894e 100644 --- a/arch/x86/boot/util.S +++ b/arch/x86/boot/util.S @@ -14,10 +14,6 @@ .code32 .section .multiboot.text, "ax", @progbits -/* - * miscellaneous utility routines - */ - /* void _x86_write_tss_base(u64 *gdt_entry, struct x86_tss *tss) */ ENTRY(_x86_write_tss_base) movl 4(%esp), %edi diff --git a/arch/x86/include/arch/page.h b/arch/x86/include/arch/page.h index 98593b0..ebab061 100644 --- a/arch/x86/include/arch/page.h +++ b/arch/x86/include/arch/page.h @@ -68,8 +68,8 @@ static inline void vm_flush(void) { register_t tmp; __asm__ volatile( - " mov %%cr3, %0 \n" - " mov %0, %%cr3 \n" +" mov %%cr3, %0 \n" +" mov %0, %%cr3 \n" : "=r"(tmp) : : "memory" diff --git a/arch/x86/include/arch/string.h b/arch/x86/include/arch/string.h new file mode 100644 index 0000000..04057a1 --- /dev/null +++ b/arch/x86/include/arch/string.h @@ -0,0 +1,5 @@ +/* Copyright (C) 2021,2022 fef . All rights reserved. */ + +#pragma once + +#include diff --git a/arch/x86/mm/amd64/init.c b/arch/x86/mm/amd64/init.c index 38ed1b4..4984c8b 100644 --- a/arch/x86/mm/amd64/init.c +++ b/arch/x86/mm/amd64/init.c @@ -27,14 +27,33 @@ static void register_area(struct mb2_mmap_entry *entry) vm_paddr_t end = start + entry->len; if (start >= DMA_LIMIT) { + /* + * --------------------- end + * MM_ZONE_NORMAL + * --------------------- start + * + * --------------------- DMA_LIMIT + */ __boot_register_mem_area(start, end, MM_ZONE_NORMAL); - } else if (start < DMA_LIMIT && end > DMA_LIMIT) { + } else if (end > DMA_LIMIT) { + /* + * ----------------- end + * MM_ZONE_NORMAL + * ----------------- DMA_LIMIT + * MM_ZONE_DMA + * ----------------- start + */ __boot_register_mem_area(start, DMA_LIMIT, MM_ZONE_DMA); __boot_register_mem_area(DMA_LIMIT, end, MM_ZONE_NORMAL); - } else if (start < DMA_LIMIT && end <= DMA_LIMIT) { - __boot_register_mem_area(start, end, MM_ZONE_DMA); } else { - panic("congratulations, you reached an unreachable branch"); + /* + * --------------------- DMA_LIMIT + * + * --------------------- end + * MM_ZONE_DMA + * --------------------- start + */ + __boot_register_mem_area(start, end, MM_ZONE_DMA); } } @@ -68,8 +87,8 @@ static void map_direct_area(vm_paddr_t end) for (int pdpti = 0; pdpti < 512; pdpti++) { x86_pdpte_t *pdpte = X86_PDPTE(vpos); - pdpte->val = ppos | __P_PRESENT | __P_RW | __P_GLOBAL - | __P_HUGE | __P_NOEXEC; + pdpte->val = ppos | __P_PRESENT | __P_RW | __P_NOCACHE | __P_WRITE_THROUGH + | __P_GLOBAL | __P_HUGE | __P_NOEXEC; ppos += GIGAPAGE_SIZE; vpos += GIGAPAGE_SIZE; @@ -129,7 +148,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) vm_paddr_t pml4te_val = __boot_pmalloc(PAGE_SHIFT, MM_ZONE_NORMAL); panic_if(pml4te_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array"); __boot_clear_page(pml4te_val); - pml4te_val |= __P_PRESENT | __P_RW | __P_NOCACHE | __P_GLOBAL | __P_NOEXEC; + pml4te_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; pml4te->val = pml4te_val; vm_flush(); @@ -145,8 +164,8 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) * that is not the case. I've checked the disassembly with -O2, * and clang is emitting the check. So it's fine, i guess. */ if (pdpte_val != BOOT_PMALLOC_ERR) { - pdpte_val |= __P_PRESENT | __P_RW | __P_NOCACHE - | __P_HUGE | __P_GLOBAL | __P_NOEXEC; + pdpte_val |= __P_PRESENT | __P_RW | __P_HUGE + | __P_GLOBAL | __P_NOEXEC; pdpte->val = pdpte_val; map_pos += GIGAPAGE_SIZE; if (map_pos >= map_end) @@ -160,7 +179,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) panic_if(pdpte_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array"); __boot_clear_page(pdpte_val); - pdpte_val |= __P_PRESENT | __P_RW | __P_NOCACHE | __P_GLOBAL | __P_NOEXEC; + pdpte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; pdpte->val = pdpte_val; vm_flush(); @@ -173,8 +192,8 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) if (map_end - map_pos >= HUGEPAGE_SIZE) { pdte_val = __boot_pmalloc(X86_PDT_SHIFT, MM_ZONE_NORMAL); if (pdte_val != BOOT_PMALLOC_ERR) { - pdte_val |= __P_PRESENT | __P_RW | __P_NOCACHE - | __P_GLOBAL | __P_HUGE | __P_NOEXEC; + pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL + | __P_HUGE | __P_NOEXEC; pdte->val = pdte_val; map_pos += HUGEPAGE_SIZE; if (map_pos >= map_end) @@ -188,8 +207,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) panic_if(pdte_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array"); __boot_clear_page(pdpte_val); - pdte_val |= __P_PRESENT | __P_RW | __P_NOCACHE - | __P_GLOBAL | __P_NOEXEC; + pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; pdte->val = pdte_val; vm_flush(); @@ -199,8 +217,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) vm_paddr_t pte_val = __boot_pmalloc(X86_PT_SHIFT, MM_ZONE_NORMAL); panic_if(pte_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array"); - pte_val |= __P_PRESENT | __P_RW | __P_NOCACHE - | __P_GLOBAL | __P_NOEXEC; + pte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; pte->val = pte_val; map_pos += PAGE_SIZE; @@ -228,8 +245,10 @@ void __boot_clear_page(vm_paddr_t paddr) vm_paddr_t pbase = align_floor(paddr, 1 << X86_PDPT_SHIFT); vm_offset_t offset = paddr - pbase; void *vbase = (void *)KERNBASE - (1 << X86_PDPT_SHIFT); - x86_pdpte_t *pdpe = X86_PDPTE(vbase); - pdpe->val = pbase | __P_PRESENT | __P_RW | __P_NOCACHE | __P_HUGE | __P_NOEXEC; + x86_pdpte_t *pdpte = X86_PDPTE(vbase); + x86_pdpte_t old_pdpte = *pdpte; + old_pdpte.val = pdpte->val; + pdpte->val = pbase | __P_PRESENT | __P_RW | __P_NOCACHE | __P_HUGE | __P_NOEXEC; vm_flush(); memset64(vbase + offset, 0, PAGE_SIZE); pdpe->val = 0; diff --git a/arch/x86/mm/amd64/page.c b/arch/x86/mm/amd64/page.c index 7988e44..543458b 100644 --- a/arch/x86/mm/amd64/page.c +++ b/arch/x86/mm/amd64/page.c @@ -1,12 +1,18 @@ /* Copyright (C) 2021,2022 fef . All rights reserved. */ +#include #include +#include #include #include #include +#include #include #include +#include + +#include /* * Initial Page Directory Pointer Table and Page Map Level 4 Table for the @@ -48,7 +54,10 @@ void x86_isr_page_fault(trap_frame_t *frame, u32 error_code) kprintf("\n########## B O N K ##########\n"); kprintf("Illegal %s %s%s address %p!\n", space, rwx, present, address); print_regs(frame); - panic("Page fault"); + /* print a stack trace if this came from kernel space */ + if (frame->hw_frame.cs == X86_64_KERN_CS) + ktrace_print_from((void *)frame->rbp); + panic_notrace("Page fault"); } vm_paddr_t vtophys(void *virt) @@ -79,3 +88,14 @@ vm_paddr_t vtophys(void *virt) vm_paddr_t phys_base = pte->val & X86_PMAP_MASK; return phys_base + ((vm_paddr_t)virt % (1 << X86_PT_SHIFT)); } + +void page_clear(vm_page_t page) +{ + register_t cpuflags = intr_disable(); + page_lock(page); + u64 *dest = DMAP_START + (pg2pfn(page) << PAGE_SHIFT); + usize nbyte = (usize)1 << (pga_order(page) + PAGE_SHIFT); + memset64(dest, 0, nbyte); + page_unlock(page); + intr_restore(cpuflags); +} diff --git a/arch/x86/mm/i386/page.c b/arch/x86/mm/i386/page.c index 258b970..f876fc9 100644 --- a/arch/x86/mm/i386/page.c +++ b/arch/x86/mm/i386/page.c @@ -9,6 +9,7 @@ * address `0xfffff000-0xffffffff`, then points to the page directory itself. */ +#include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include #include @@ -275,14 +277,12 @@ uintptr_t vtophys(void *virt) return phys; } -void vm_flush(void) +void page_clear(vm_page_t page) { - register_t tmp; - __asm__ volatile( -" mov %%cr3, %0 \n" -" mov %0, %%cr3 \n" - : "=r"(tmp) - : - : "memory" - ); + register_t cpuflags = intr_disable(); + page_lock(page); + u32 *dest = DMAP_START + (pg2pfn(page) << PAGE_SHIFT); + usize nbyte = (usize)1 << (pga_order(page) + PAGE_SHIFT); + memset32(dest, 0, nbyte); + page_unlock(page); } diff --git a/doc/amd64/memory.md b/doc/amd64/memory.md index 5f843f8..3784cac 100644 --- a/doc/amd64/memory.md +++ b/doc/amd64/memory.md @@ -31,20 +31,24 @@ It also kind of makes you appreciate the sheer vastness of 64-bit address space. Kernel space addresses start at `0xffff800000000000` because the MMU "only" supports 48-bit linear addresses. -The way i've understood it, the Intel spec says the 17 MSBs of virtual -addresses must be all the same, but other than that are ignored. +The way i've understood it, the Intel spec says bits 63:48 of virtual +addresses must be copies of bit 47, but other than that are ignored. So, as far as the MMU is concerned, the huge hole doesn't even exist: Userspace ranges from `0x000000000000~0x7fffffffffff`, and everything belonging to the kernel from `0x800000000000~0xffffffffffff` (note how the leading 0's/f's are missing, these are 48-bit values). The linear physical memory is a direct mapping of physical RAM, which is -required because `kmalloc()` needs to be able to allocate *physically* -contiguous memory for DMA transfers. +required because `kmalloc()` and friends need to be able to allocate +*physically* contiguous memory for DMA transfers and i don't have the energy +to update kernel page maps every time the kernel needs a new page. The kernel image itself is loaded into physical memory at `0x00400000` by default, and the entire low 2 GB of physical memory are statically mapped to the end of virtual memory (-2 GB). That way, we can use `-mcmodel=kernel`, which prevents the compiler from emitting raw address loads and absolute jumps (this is significantly faster). -All kernel code resides within the -2 GB region. \ No newline at end of file +All kernel code resides within the -2 GB region. + +The `vm_page_array`, which keeps track of what each individual page is used for, +starts directly at the beginning of the kernel area at -2 TB. diff --git a/include/gay/cdefs.h b/include/gay/cdefs.h index 13e7ac1..e75c529 100644 --- a/include/gay/cdefs.h +++ b/include/gay/cdefs.h @@ -88,6 +88,9 @@ /** @brief Mark the symbol as used, even if it really isn't. */ #define __used __attribute__(( used )) +/** @brief Tell the compiler that a struct member is intentionally unused. */ +#define __unused __attribute__(( unused )) + /** @brief Symbol may be silently redefined. */ #define __weak __attribute__(( weak )) diff --git a/include/gay/mm.h b/include/gay/mm.h index 4f02017..08feb8a 100644 --- a/include/gay/mm.h +++ b/include/gay/mm.h @@ -6,16 +6,19 @@ * @file include/gay/mm.h * @brief Header for dynamic memory management * - * To avoid possible confusion (and Not break 32-bit systems, even though they - * aren't really supported anyway), physical memory addresses always use type - * `vm_paddr_t` and virtual ones are `void *`. This should give us at least - * some type of compiler warning if they are accidentally mixed up. + * To avoid possible confusion (and Not break systems where virtual addresses + * are less wide than physical ones, like IA-32 with PAE), physical memory + * addresses always use type `vm_paddr_t` and virtual ones are `void *`. + * This should give us at least some type of compiler warning if they are + * accidentally mixed up. * * GayBSD uses a classic slab algorithm for its own data structures, which is * backed by a buddy page frame allocator. The latter is also used for getting * bigger areas of memory that are not physically contiguous (for regular user * allocations). The entire physical memory is mapped statically in the range - * `DMAP_START - DMAP_END`. + * `DMAP_START - DMAP_END` in order to make clearing pages without a specific + * mapping easier, even though regular code outside the mm subsystem should be + * completely oblivious to this fact. * * Memory is split up into (currently) two zones: `MM_ZONE_NORMAL` and * `MM_ZONE_DMA`. As their names suggest, the former is for general purpose @@ -23,6 +26,10 @@ * Zones are further divided into pools, each of which hold a list of groups of * free pages. The size of these page groups is determined by the pool's order, * where the pool of order `n` holds groups of `1 << n` pages. + * + * The mm subsystem needs to allocate memory for initializing itself. + * Therefore, there is an additional boot page frame allocator, which gets the + * free areas from architecture dependent code (`arch/mm/.../init.c`). */ #ifdef _KERNEL @@ -38,17 +45,24 @@ #include -#define _M_ZONE_NORMAL 0 -#define _M_ZONE_DMA 1 -#define _M_ZONE_INDEX(flags) ((flags) & 1) +#define _M_ZONE_DMA 0 +#define _M_ZONE_NORMAL 1 +/* we use 2 bits because there are likely gonna be additional zones in the future */ +#define _M_ZONE_INDEX(flags) ((flags) & 3) + +#define _M_EMERG (1 << 2) +#define _M_NOWAIT (1 << 3) -#define _M_EMERG (1 << 1) -#define _M_NOWAIT (1 << 2) +#ifndef _HAVE_VM_PAGE_T +#define _HAVE_VM_PAGE_T 1 +struct vm_page; +typedef struct vm_page *vm_page_t; +#endif enum mm_zone_type { - MM_ZONE_NORMAL = _M_ZONE_NORMAL, MM_ZONE_DMA = _M_ZONE_DMA, - MM_NR_ZONES + MM_ZONE_NORMAL = _M_ZONE_NORMAL, + MM_NR_ZONES = 2 }; /** @brief Boot memory area. */ @@ -76,7 +90,7 @@ struct mm_zone { /** @brief Thresholds for OOM behavior */ struct { /** @brief Minimum number of pages reserved for emergency allocations */ - u_long emerg; + long emerg; } thrsh; struct mm_pool pools[MM_NR_ORDERS]; struct clist _bmem_areas; /* -> struct _bmem_area */ @@ -92,7 +106,9 @@ struct mm_zone { extern struct mm_zone mm_zones[MM_NR_ZONES]; /* kernel/mm/page.c */ /** - * @brief Memory allocation flags passed to `kmalloc()`. + * @brief Memory allocation flags commonly used by all allocators. + * All of them are eventually passed down to `page_alloc()`, the physical page + * frame allocator, */ enum mflags { /** @brief Use emergency memory reserves if necessary */ @@ -107,6 +123,9 @@ enum mflags { M_DMA = _M_ZONE_DMA, }; +/** @brief Initialize the slab allocator. */ +void kmalloc_init(void); + /** * @brief Allocate memory. * @@ -125,33 +144,6 @@ void *kmalloc(size_t size, enum mflags flags) __malloc_like __alloc_size(1); */ void kfree(void *ptr); -/** - * @brief Flags for the paging structures. - * - * The macros with two underscores in front of them are defined in `arch/page.h` - * and match the respective bit positions in the platform's native hardware - * layout for better performance (no shifting around required). - */ -enum pflags { - P_PRESENT = __P_PRESENT, /**< @brief Page exists */ - P_RW = __P_RW, /**< @brief Page is writable */ - P_USER = __P_USER, /**< @brief Page is accessible from ring 3 */ - P_ACCESSED = __P_ACCESSED, /**< @brief Page has been accessed */ - P_DIRTY = __P_DIRTY, /**< @brief Page has been written */ - P_GLOBAL = __P_GLOBAL, /**< @brief The entry survives `vm_flush()` */ - P_NOCACHE = __P_NOCACHE, /**< @brief The TLB won't cache this entry */ - P_SLAB = __P_SLAB, /**< @brief Page is used by the slab allocator */ - P_NOSLEEP = __P_ATOMIC, /**< @brief Page is atomic */ -#ifdef __HAVE_HUGEPAGES - /** @brief This page is `HUGEPAGE_SIZE` bytes long, rather than `PAGE_SIZE` */ - P_HUGE = __P_HUGE, -#endif -#ifdef __HAVE_NOEXEC - /** @brief No instructions can be fetched from this page */ - P_NOEXEC = __P_NOEXEC, -#endif -}; - /** * @brief Initialize the buddy page frame allocator. * This is only called once, from the arch dependent counterpart after it has @@ -161,11 +153,22 @@ enum pflags { void paging_init(vm_paddr_t phys_end); /** - * @brief Allocate a contiguous region in physical memory. + * @brief Allocate a physically contiguous region and initialize it with zeroes. * The returned region will be `(1 << order) * PAGE_SIZE` bytes long. * - * **The pages are not initialized.** - * If you want zeroed pages, use `get_zero_pages()`. + * @param order Order of magnitude (as in `1 << order` pages) + * @param flags How to allocate + * @return The page group that was allocated (evaluates false on failure) + */ +vm_page_t page_alloc(u_int order, enum mflags flags) __malloc_like; + +/** + * @brief Allocate and map a physically contiguous region in memory. + * The returned region will be `(1 << order) * PAGE_SIZE` bytes long, + * and initialized with zeroes. + * + * If filling the page with zeroes takes too much time, use `page_alloc()`. + * But only if you're careful and it's not an allocation for user space. * * @param order Order of magnitude (as in `1 << order` pages) * @param flags How to allocate @@ -173,12 +176,11 @@ void paging_init(vm_paddr_t phys_end); * or `nil` if the allocation failed */ void *get_pages(u_int order, enum mflags flags) __malloc_like; +/** @brief Alias for `get_pages(0, flags)`. */ void *get_page(enum mflags flags) __malloc_like; -void *get_zero_pages(u_int order, enum mflags flags) __malloc_like; -void *get_zero_page(enum mflags flags) __malloc_like; -void free_pages(void *ptr); -#define free_page(ptr) free_pages(ptr) +/** @brief Free a page from `page_alloc()`. */ +void page_free(vm_page_t page); /** * @brief Initialize the slab caches. diff --git a/include/gay/vm/page.h b/include/gay/vm/page.h index d2eae11..297fea4 100644 --- a/include/gay/vm/page.h +++ b/include/gay/vm/page.h @@ -11,18 +11,6 @@ #include #include -/* - * I'm trying really hard to keep the size of struct vm_page a power of two - * on LP64 systems, because that way we can quickly get to the page frame number - * by shifting the byte offset of the vm_page_t in vm_page_array to the right - * rather than doing a costly divide instruction (or store the page frame number - * within the structure itself, which takes up precious space). - * - * There is insane pressure on the size of this structure, because a typical - * system will have millions of instances of it. Every additional byte makes - * a significant difference in memory management overhead. - */ - union vm_page_attr { int _val; struct { @@ -49,6 +37,9 @@ union vm_page_attr { typedef union vm_page_attr vm_page_attr_t; +/* defined in kernel/mm/slab.c */ +struct slab_pool; + /** * @brief Stores information about a single page in physical memory. * There is exactly one of these for every physical page, no matter what that @@ -59,18 +50,31 @@ struct vm_page { atom_t count; /** @brief Page attributes, use the macros below to access this */ atom_t attr; - /** @brief If the page is free, this is its freelist. */ - struct clist link; + /** @brief Page frame number */ + u_long pfn; /** - * @brief Optional extra data pointer, reserved for private use. - * The current owner of the page may use this to track the underlying - * object in memory (or pretty much anything else), for example the - * `struct slab` if this page is currently used by the slab allocator. + * @brief If the page is free, this is its freelist. + * If the page is used in the slab allocator, this is the list for the + * pool in which it currently resides. */ - void *extra; + struct clist link; + union { + struct { + void **freelist; + struct slab_pool *pool; + u_int entry_size; + u_int free_count; + } slab; + }; }; +#define INVALID_PAGE nil +#define SLAB(page) (&(page)->slab) + +#ifndef _HAVE_VM_PAGE_T +#define _HAVE_VM_PAGE_T 1 typedef struct vm_page *vm_page_t; +#endif /** @brief Array of every single page in physical memory, indexed by page frame number. */ extern struct vm_page *const vm_page_array; @@ -82,6 +86,9 @@ extern vm_page_t _vm_page_array_end; #define PGADDR_ASSERT(x) ({}) #endif +/** @brief Fill a page with zeroes (size depends on the current page order). */ +void page_clear(vm_page_t page); + static inline u8 pga_order(vm_page_t page) { union vm_page_attr attr = { ._val = atom_read(&page->attr) }; @@ -211,7 +218,7 @@ __pure2 static inline u_long pg2pfn(vm_page_t page) { PGADDR_ASSERT(page < _vm_page_array_end); - return page - vm_page_array; + return page->pfn; } /** @@ -224,7 +231,8 @@ static inline vm_page_t vaddr2pg(void *vaddr) { PGADDR_ASSERT(vaddr >= DMAP_START && vaddr < (void *)_vm_page_array_end); uintptr_t offset = (uintptr_t)vaddr - DMAP_OFFSET; - return &vm_page_array[offset >> PAGE_SHIFT]; + struct vm_page *page = &vm_page_array[offset >> PAGE_SHIFT]; + return page - page->pfn % (1 << pga_order(page)); } /** @@ -254,7 +262,7 @@ static inline vm_page_t paddr2pg(vm_paddr_t paddr) { vm_page_t page = vm_page_array + (paddr >> PAGE_SHIFT); PGADDR_ASSERT(page < _vm_page_array_end); - return page; + return page - page->pfn % (1 << pga_order(page)); } /** @@ -267,19 +275,3 @@ static inline void *pfn2vaddr(u_long pfn) PGADDR_ASSERT(&vm_page_array[pfn] < _vm_page_array_end); return DMAP_START + (pfn << PAGE_SHIFT); } - -/* - * We have to be careful in this macro, because only the first page in the - * order group has the correct order set. So we can only read it once at - * the beginning of the loop, since the page pointer is being updated. - */ - -/** - * @brief Iterate over every page in its order group. - * - * @param page The first `vm_page_t` in the group. - */ -#define vm_page_foreach_in_order(page) \ - for (int __i = 1 << pga_order(page); \ - __i >= 0; \ - __i = ({ ++(page); --__i; })) diff --git a/kernel/mm/boot.c b/kernel/mm/boot.c index 37a07cd..8fa5aa5 100644 --- a/kernel/mm/boot.c +++ b/kernel/mm/boot.c @@ -8,7 +8,7 @@ #include -static struct _bmem_area _bmem_area_cache[16]; +static struct _bmem_area _bmem_area_cache[128]; static CLIST(bmem_area_freelist); #ifdef DEBUG @@ -37,6 +37,7 @@ static void free_bmem_area(struct _bmem_area *area) clist_add(&bmem_area_freelist, &area->link); } +/* insert an area when we already know there are no intersections with reserved memory */ static void insert_area_unsafe(vm_paddr_t start, vm_paddr_t end, enum mm_zone_type zone_type) { KASSERT((start % PAGE_SIZE) == 0); diff --git a/kernel/mm/page.c b/kernel/mm/page.c index cd32105..d888ced 100644 --- a/kernel/mm/page.c +++ b/kernel/mm/page.c @@ -66,19 +66,18 @@ static inline u_int paddr_find_order(vm_paddr_t addr) } /** @brief Claim all free pages in one of the memory areas from the boot allocator. */ -static inline void claim_bmem_area(struct mm_zone *zone, struct _bmem_area *area) +static inline void claim_bmem_area(struct mm_zone *zone, const struct _bmem_area *area) { - vm_paddr_t start = area->start; - vm_paddr_t end = area->end; - vm_paddr_t pos = start; - vm_size_t nr_pages = end - start / PAGE_SIZE; - latom_add(&zone->free_count, (long)nr_pages); + u_int order = paddr_find_order(area->start); + while (area->start + ORDER_SIZE(order) > area->end) + order--; + + struct vm_page *const start = paddr2pg(area->start); + struct vm_page *const end = paddr2pg(area->end); + struct vm_page *pos = start; - struct vm_page *page = &vm_page_array[start >> PAGE_SHIFT]; - u_int order = paddr_find_order(start); - /* make sure the boot memory allocator cannot under any circumstances hand - * out pages from this area anymore, even though that should be unnecessary */ - clist_del(&area->link); + const vm_size_t nr_pages = end->pfn - start->pfn; + latom_add(&zone->free_count, (long)nr_pages); /* * We want to insert pages at the highest possible order. However, the @@ -90,15 +89,21 @@ static inline void claim_bmem_area(struct mm_zone *zone, struct _bmem_area *area * subsequently lower the order again. */ while (pos < end) { - struct mm_pool *pool = &zone->pools[order]; - clist_add(&pool->freelist, &page->link); + struct mm_pool *const pool = &zone->pools[order]; + clist_add(&pool->freelist, &pos->link); pool->free_entries++; /* only the first page in the order group is inserted into * the freelist, but all of them need to be initialized */ - for (u_int i = 0; i < (1 << order); i++) { - atom_init(&page[i].count, 0); - atom_init(&page[i].attr, 0); + for (u_int i = 0; i < (1u << order); i++) { + if (pos >= end) + panic("page %p out of range", pos); + if (atom_read(&pos->count) != 420) + panic("page %p double initialized\n", pos); + atom_init(&pos->count, 0); + atom_init(&pos->attr, 0); + + pos++; } /* @@ -111,22 +116,14 @@ static inline void claim_bmem_area(struct mm_zone *zone, struct _bmem_area *area * |---------------------|----> pos * start end */ - pos += ORDER_SIZE(order); - page += (1 << order); - if (order < MM_MAX_ORDER && pos + ORDER_SIZE(order) <= end) { + if (order < MM_MAX_ORDER && pos + (1 << (order + 1)) <= end) { /* this makes the rising part of the graph */ order++; - } else if (order > 0 && pos > end) { - /* we have overshot, lower the order */ - pos -= ORDER_SIZE(order); - page -= (1 << order); + } else if (order > 0 && pos + (1 << order) > end) { /* this makes the abrupt downwards jump at the end of the graph */ while (--order) { - if (pos + ORDER_SIZE(order) <= end) { - pos += ORDER_SIZE(order); - page += (1 << order); + if (pos + (1 << order) <= end) break; - } } } } @@ -141,7 +138,7 @@ void paging_init(vm_paddr_t phys_end) usize bitmap_total_size = 0; for (int order = 0; order < MM_NR_ORDERS; order++) { - usize pages = phys_end >> ORDER_SHIFT(order + 1); + usize pages = phys_end >> ORDER_SHIFT(order); pages = align_ceil(pages, LATOM_BIT * 2); usize bytes = pages / (CHAR_BIT * 2); bitmap_sizes[order] = bytes; @@ -158,7 +155,7 @@ void paging_init(vm_paddr_t phys_end) bitmap_size_log2--; /* the bit index returned by flsl starts at 1 */ if (bitmap_total_size ^ (1ul << bitmap_size_log2)) bitmap_size_log2++; /* bitmap_total_size is not a power of 2, round up */ - uintptr_t bitmap_start_phys = __boot_pmalloc(bitmap_size_log2, MM_ZONE_NORMAL); + vm_paddr_t bitmap_start_phys = __boot_pmalloc(bitmap_size_log2, MM_ZONE_NORMAL); panic_if(bitmap_start_phys == BOOT_PMALLOC_ERR, "cannot allocate memory for the page bitmaps"); memset(__v(bitmap_start_phys), 0, bitmap_total_size); @@ -168,12 +165,15 @@ void paging_init(vm_paddr_t phys_end) */ for (int zone_index = 0; zone_index < ARRAY_SIZE(mm_zones); zone_index++) { struct mm_zone *zone = &mm_zones[zone_index]; + latom_init(&zone->free_count, 0); + /* we use the same bitmaps for all zones */ latom_t *bitmap_pos = __v(bitmap_start_phys); for (int order = 0; order < MM_NR_ORDERS; order++) { - zone->pools[order].bitmap = bitmap_pos; - clist_init(&zone->pools[order].freelist); - zone->pools[order].free_entries = 0; - latom_init(&zone->free_count, 0); + struct mm_pool *pool = &zone->pools[order]; + pool->bitmap = bitmap_pos; + pool->free_entries = 0; + clist_init(&pool->freelist); + spin_init(&pool->lock); bitmap_pos += bitmap_sizes[order]; } @@ -188,12 +188,13 @@ void paging_init(vm_paddr_t phys_end) * * if the reserved bit is set, all other fields in the page are invalid. */ - for (usize i = 0; i < phys_end >> PAGE_SHIFT; i++) { + for (u_long pfn = 0; pfn < phys_end >> PAGE_SHIFT; pfn++) { /* This is merely an optimization to simplify checking whether * two buddies can be coalesced into one. In reality, the * reference count is invalid because the page is reserved. */ - atom_init(&vm_page_array[i].count, 1); - atom_init(&vm_page_array[i].attr, _PGA_RSVD_MASK); + atom_init(&vm_page_array[pfn].count, 420); + atom_init(&vm_page_array[pfn].attr, _PGA_RSVD_MASK); + vm_page_array[pfn].pfn = pfn; } /* @@ -203,11 +204,15 @@ void paging_init(vm_paddr_t phys_end) struct mm_zone *zone = &mm_zones[i]; struct _bmem_area *area, *tmp; clist_foreach_entry_safe(&zone->_bmem_areas, area, tmp, link) { + /* make sure the boot memory allocator cannot under any circumstances hand + * out pages from this area anymore, even though that should be unnecessary */ + clist_del(&area->link); + claim_bmem_area(zone, area); + zone->thrsh.emerg = latom_read(&zone->free_count) / CFG_PAGE_EMERG_DENOM; + if (zone->thrsh.emerg > CFG_PAGE_EMERG_MAX) + zone->thrsh.emerg = CFG_PAGE_EMERG_MAX; } - zone->thrsh.emerg = latom_read(&zone->free_count) / CFG_PAGE_EMERG_DENOM; - if (zone->thrsh.emerg > CFG_PAGE_EMERG_MAX) - zone->thrsh.emerg = CFG_PAGE_EMERG_MAX; } } @@ -218,22 +223,27 @@ static inline bool pg_flip_bit(struct mm_zone *zone, u_long pfn, u_int order) return latom_flip_bit(bitmap, (int)(bit % LATOM_BIT)); } -__malloc_like -static void *__get_pages(u_int order, enum mflags flags) +vm_page_t page_alloc(u_int order, enum mflags flags) { - PAGE_ASSERT(order >= 0); - struct mm_zone *zone = &mm_zones[_M_ZONE_INDEX(flags)]; - if (order > MM_MAX_ORDER) { page_debug("get_pages(%d, %#08x): Order too high!\n", order, flags); return nil; } - u_long count_after = latom_sub(&zone->free_count, (1 << order)) - (1 << order); + struct mm_zone *zone = &mm_zones[_M_ZONE_INDEX(flags)]; + long count_after; +try_next_zone: + count_after = latom_sub(&zone->free_count, (1 << order)) - (1 << order); if (count_after < zone->thrsh.emerg) { if (count_after < 0 || !(flags & _M_EMERG)) { latom_add(&zone->free_count, (1 << order)); - return nil; + /* if we can't allocate from ZONE_NORMAL, fall back to ZONE_DMA */ + if (zone > &mm_zones[0]) { + zone--; + goto try_next_zone; + } else { + return nil; + } } } @@ -266,93 +276,76 @@ static void *__get_pages(u_int order, enum mflags flags) intr_restore(cpuflags); } + if (page == nil) { + if (zone > &mm_zones[0]) { + /* + * If we reach this, the current zone technically had enough free + * pages for the allocation, but those pages were split up into + * smaller chunks rather than a contiguous area. However, we don't + * give up quite yet: If possible, we fall back to a lower memory + * zone (ZONE_NORMAL -> ZONE_DMA) and start over from the top. + */ + zone--; + goto try_next_zone; + } else { + return nil; + } + } + /* * if we found a page, check if we need to split it up * (which is the case if we took one from a higher order freelist) */ - if (page != nil) { - usize pfn = pg2pfn(page); - page_debug_noisy("alloc order %u, split pfn %#lx from order %u\n", - order, pfn, page_order); - pg_flip_bit(zone, pfn, page_order); - - /* split the page and insert the upper halves into the - * respective freelist until we reach the requested order */ - while (page_order-- > order) { - page_debug_noisy("split %p (order = %u)\n", pfn2vaddr(pfn), page_order); - struct mm_pool *pool = &zone->pools[page_order]; - vm_page_t buddy = page + (1 << page_order); - pga_set_order(buddy, page_order); - pg_flip_bit(zone, pfn + (1 << page_order), page_order); - - disable_intr(); - spin_lock(&pool->lock); - clist_add_first(&pool->freelist, &buddy->link); - pool->free_entries++; - spin_unlock(&pool->lock); - intr_restore(cpuflags); - } - - pga_set_order(page, order); - void *vaddr = pfn2vaddr(pfn); + usize pfn = pg2pfn(page); + page_debug_noisy("alloc order %u, split pfn %#lx from order %u\n", + order, pfn, page_order); + pg_flip_bit(zone, pfn, page_order); + + /* split the page and insert the upper halves into the + * respective freelist until we reach the requested order */ + while (page_order-- > order) { + page_debug_noisy("split %p (order = %u)\n", pfn2vaddr(pfn), page_order); + struct mm_pool *pool = &zone->pools[page_order]; + vm_page_t buddy = page + (1 << page_order); + pga_set_order(buddy, page_order); + pg_flip_bit(zone, pfn + (1 << page_order), page_order); - return vaddr; - } else { - return nil; + disable_intr(); + spin_lock(&pool->lock); + clist_add_first(&pool->freelist, &buddy->link); + pool->free_entries++; + spin_unlock(&pool->lock); + intr_restore(cpuflags); } -} -/* faster memset for whole pages */ -static inline void init_pages(u_long *start, u_long val, u_int order) -{ - u_long *end = start + (ORDER_SIZE(order) / sizeof(*start)); - do { - *start++ = val; - } while (start != end); + for (u_int i = 0; i < (1 << order); i++) + pga_set_order(&page[i], order); + page_clear(page); + return page; } +/* + * XXX get_page() and get_pages() shouldn't depend on the direct map + * + * XXX Do we need these at all? I don't think so. + */ + void *get_pages(u_int order, enum mflags flags) { - void *pages = __get_pages(order, flags); - -#if CFG_POISON_PAGES - if (pages != nil) - init_pages(pages, PAGE_POISON_ALLOC, order); -#endif - - return pages; + vm_page_t page = page_alloc(order, flags); + if (page) + return pfn2vaddr(pg2pfn(page)); + else + return nil; } void *get_page(enum mflags flags) { - void *pages = __get_pages(0, flags); - -#if CFG_POISON_PAGES - if (pages != nil) - init_pages(pages, PAGE_POISON_ALLOC, 0); -#endif - - return pages; -} - -void *get_zero_pages(u_int order, enum mflags flags) -{ - void *pages = __get_pages(order, flags); - - if (pages != nil) - init_pages(pages, 0, order); - - return pages; -} - -void *get_zero_page(enum mflags flags) -{ - void *page = __get_pages(0, flags); - - if (page != nil) - init_pages(page, 0, 0); - - return page; + vm_page_t page = page_alloc(0, flags); + if (page) + return pfn2vaddr(pg2pfn(page)); + else + return nil; } /* @@ -377,26 +370,13 @@ static __always_inline bool can_merge(vm_page_t page, vm_page_t buddy) return merge; } -void free_pages(void *ptr) +void page_free(vm_page_t page) { - PAGE_DEBUG_BLOCK { - if (ptr < DMAP_START || ptr >= DMAP_END) { - panic("free_pages(%p): not in DMAP region\n", ptr); - } - } - register_t cpuflags = read_flags(); - vm_page_t page = vaddr2pg(ptr); - panic_if(pga_rsvd(page), "tried to free reserved page %p", ptr); - u_int order = pga_order(page); PAGE_ASSERT((uintptr_t)ptr % ORDER_SIZE(order) == 0); - u_long pfn = vaddr2pfn(ptr); - -#if CFG_POISON_PAGES - init_pages(ptr, PAGE_POISON_FREE, order); -#endif + u_long pfn = pg2pfn(page); PAGE_DEBUG_BLOCK { int old_count = atom_sub(&page->count, 1); @@ -407,6 +387,8 @@ void free_pages(void *ptr) page_debug("attempted to free %p with references", ptr); return; } + } else { + atom_dec(&page->count); } struct mm_zone *zone = &mm_zones[pga_zone(page)]; diff --git a/kernel/mm/slab.c b/kernel/mm/slab.c index f4afc72..0bb95ca 100644 --- a/kernel/mm/slab.c +++ b/kernel/mm/slab.c @@ -21,7 +21,7 @@ #if CFG_POISON_SLABS struct slab_poison { - void *_pad; /**< @brief That's where the freelist pointer is stored */ + void *_pad __unused; /**< @brief That's where the freelist pointer is stored */ void *alloc_source; /**< @brief Code address that made the alloc call */ u_long exact_size; u_long low_poison; @@ -33,32 +33,6 @@ static void poison_after_alloc(struct slab_poison *poison, u_int exact_size, voi static void poison_after_free(struct slab_poison *poison); #endif -/** - * @brief This header sits at the beginning of each slab. - * The individual entries follow immediately after the struct itself. - */ -struct slab { - struct clist link; - void **freelist; - struct slab_pool *pool; - /** @brief For `link` */ - spin_t lock; - /** - * @brief Number of free entries. - * The slabs are sorted within their pool by this value, so that we - * always hand out entries from the fullest slabs (increases locality - * and thus decreases the stress on the TLB). - * - * This is intentionally not a `usize` because entry sizes are really - * small anyway (we currently refuse to allocate anything bigger than - * `PAGE_SIZE`), so this saves a couple of bytes on systems where `int` - * is smaller than `usize`. - */ - u_int free_entries; -}; - -#define SLAB_OVERHEAD (sizeof(struct slab)) - #if CFG_DEBUG_SLAB_ALLOCS # define slab_debug(msg, ...) kprintf("[slab] " msg, ##__VA_ARGS__) # define SLAB_DEBUG_BLOCK @@ -77,12 +51,12 @@ struct slab { struct slab_pool { const u_int entry_size; /**< @brief Size of one entry in bytes */ - const int entries_per_slab; /**< @brief Max number of entries per slab */ + const u_int entries_per_slab; /**< @brief Max number of entries per slab */ atom_t total_used; /**< @brief Total allocated entries */ const u_int page_order; /**< @brief Order passed to `get_pages()` */ - struct clist empty_list; /* -> struct slab::link */ - struct clist partial_list; /* -> struct slab::link */ - struct clist full_list; /* -> struct slab::link */ + struct clist empty_list; /* -> struct vm_page::link */ + struct clist partial_list; /* -> struct vm_page::link */ + struct clist full_list; /* -> struct vm_page::link */ spin_t empty_lock; /**< @brief Lock for `empty_list` */ spin_t partial_lock; /**< @brief Lock for `partial_list` */ spin_t full_lock; /**< @brief Lock for `full_list` */ @@ -98,12 +72,10 @@ struct slab_pool { * powers of two and perfectly aligned then. */ #define _MIN1(x) ((x) < 1 ? 1 : (x)) -#define POOL_ENTRY_SIZE(sz) (( (sz) - ( SLAB_OVERHEAD / _MIN1(PAGE_SIZE / (sz)) ) ) & ~0xfu) -#define POOL_ENTRIES_PER_TABLE(sz) \ - _MIN1((PAGE_SIZE - SLAB_OVERHEAD) / POOL_ENTRY_SIZE(sz)) +#define POOL_ENTRIES_PER_TABLE(sz) _MIN1(PAGE_SIZE / (sz)) #define POOL_DEFINE(sz) { \ - .entry_size = POOL_ENTRY_SIZE(sz), \ + .entry_size = (sz), \ .entries_per_slab = POOL_ENTRIES_PER_TABLE(sz), \ .total_used = ATOM_DEFINE(0), \ .page_order = ((sz) - 1) / PAGE_SIZE, \ @@ -127,7 +99,7 @@ static struct slab_pool slab_pools_normal[] = { POOL_DEFINE(8192), POOL_DEFINE(16384), POOL_DEFINE(32768), - { .entry_size = 0 } /* terminator */ + { /* terminator */ } }; static struct slab_pool slab_pools_dma[] = { POOL_DEFINE(32), @@ -136,16 +108,16 @@ static struct slab_pool slab_pools_dma[] = { POOL_DEFINE(256), POOL_DEFINE(512), POOL_DEFINE(1024), - { .entry_size = 0 } /* terminator */ + { /* terminator */ } }; #undef _MIN1 /* we don't wanna end up using this in actual code, do we? */ static struct slab_pool *slab_zone_pools[MM_NR_ZONES] = { - [_M_ZONE_NORMAL] = slab_pools_normal, [_M_ZONE_DMA] = slab_pools_dma, + [_M_ZONE_NORMAL] = slab_pools_normal, }; -static struct slab *slab_create(struct slab_pool *pool, enum mflags flags); +static vm_page_t slab_create(struct slab_pool *pool, enum mflags flags); void kmalloc_init(void) { @@ -206,31 +178,31 @@ void *kmalloc(usize size, enum mflags flags) * it can't possibly be used for allocations anymore. * This is probably not worth the overhead, though. */ - struct slab *slab = nil; + vm_page_t page = INVALID_PAGE; /* try to use a slab that is already partially used first */ register_t cpuflags = intr_disable(); spin_lock(&pool->partial_lock); if (!clist_is_empty(&pool->partial_list)) { atom_dec(&pool->partial_count); - slab = clist_del_first_entry(&pool->partial_list, typeof(*slab), link); + page = clist_del_first_entry(&pool->partial_list, typeof(*page), link); } spin_unlock(&pool->partial_lock); - if (slab == nil) { + if (!page) { /* no partially used slab available, see if we have a completely free one */ spin_lock(&pool->empty_lock); if (!clist_is_empty(&pool->empty_list)) { atom_dec(&pool->empty_count); - slab = clist_del_first_entry(&pool->empty_list, typeof(*slab), link); + page = clist_del_first_entry(&pool->empty_list, typeof(*page), link); } spin_unlock(&pool->empty_lock); - if (slab == nil) { + if (!page) { /* we're completely out of usable slabs, allocate a new one */ intr_restore(cpuflags); - slab = slab_create(pool, flags); - if (slab == nil) { + page = slab_create(pool, flags); + if (!page) { slab_debug("kernel OOM\n"); return nil; } @@ -238,22 +210,22 @@ void *kmalloc(usize size, enum mflags flags) } } - /* if we've made it to here, slab != nil and interrupts are disabled */ - spin_lock(&slab->lock); - void *ret = slab->freelist; - slab->freelist = *slab->freelist; - if (--slab->free_entries == 0) { + /* if we've made it to here, we have a slab and interrupts are disabled */ + page_lock(page); + void *ret = page->slab.freelist; + SLAB(page)->freelist = *SLAB(page)->freelist; + if (--page->slab.free_count == 0) { spin_lock(&pool->full_lock); - clist_add(&pool->full_list, &slab->link); + clist_add(&pool->full_list, &page->link); spin_unlock(&pool->full_lock); atom_inc(&pool->full_count); } else { spin_lock(&pool->partial_lock); - clist_add(&pool->partial_list, &slab->link); + clist_add(&pool->partial_list, &page->link); spin_unlock(&pool->partial_lock); atom_inc(&pool->partial_count); } - spin_unlock(&slab->lock); + page_unlock(page); intr_restore(cpuflags); atom_inc(&pool->total_used); @@ -275,8 +247,7 @@ void kfree(void *ptr) vm_page_t page = vaddr2pg(ptr); SLAB_ASSERT(pga_slab(page)); - struct slab *slab = page->extra; - struct slab_pool *pool = slab->pool; + struct slab_pool *pool = SLAB(page)->pool; #if CFG_POISON_SLABS struct slab_poison *poison = container_of(ptr, typeof(*poison), data); poison_after_free(poison); @@ -284,63 +255,63 @@ void kfree(void *ptr) #endif register_t cpuflags = intr_disable(); - spin_lock(&slab->lock); - *(void **)ptr = slab->freelist; - slab->freelist = (void **)ptr; - if (++slab->free_entries == pool->entries_per_slab) { + page_lock(page); + *(void **)ptr = SLAB(page)->freelist; + SLAB(page)->freelist = (void **)ptr; + if (++SLAB(page)->free_count == pool->entries_per_slab) { spin_lock(&pool->partial_lock); - clist_del(&slab->link); + clist_del(&page->link); spin_unlock(&pool->partial_lock); atom_dec(&pool->partial_count); spin_lock(&pool->empty_lock); - clist_add(&pool->empty_list, &slab->link); + clist_add(&pool->empty_list, &page->link); spin_unlock(&pool->empty_lock); atom_inc(&pool->empty_count); } - spin_unlock(&slab->lock); + page_unlock(page); atom_dec(&pool->total_used); intr_restore(cpuflags); } -static struct slab *slab_create(struct slab_pool *pool, enum mflags flags) +static vm_page_t slab_create(struct slab_pool *pool, enum mflags flags) { slab_debug_noisy("Creating new cache for entry_size %u\n", pool->entry_size); - struct slab *slab = get_zero_pages(pool->page_order, flags); - - if (slab != nil) { - vm_page_t page = vaddr2pg(slab); - /* XXX it's probably sufficient to only do this for the lowest page */ - vm_page_foreach_in_order(page) { - pga_set_slab(page, true); - page->extra = slab; - } + vm_page_t page = page_alloc(pool->page_order, flags); - spin_init(&slab->lock); - slab->pool = pool; - slab->free_entries = pool->entries_per_slab; + if (page) { + pga_set_slab(page, true); + SLAB(page)->pool = pool; + SLAB(page)->free_count = pool->entries_per_slab; void *prev = nil; - void *end = (void *)slab + (1 << (pool->page_order + PAGE_SHIFT)); + /* XXX this should not rely on a direct map */ + void *start = pfn2vaddr(pg2pfn(page)); + void *end = start + (1 << (pool->page_order + PAGE_SHIFT)); void *pos = end; do { pos -= pool->entry_size; *(void **)pos = prev; prev = pos; - } while (pos >= (void *)&slab[1] + pool->entry_size); - slab->freelist = pos; + } while (pos > start); + SLAB(page)->freelist = pos; } - return slab; + return page; } #if CFG_POISON_SLABS + static inline void poison_after_alloc(struct slab_poison *poison, u_int exact_size, void *alloc_source) { u_int offset = align_ceil(poison->exact_size, sizeof(long)) / sizeof(long); u_long *poison_start = &poison->low_poison; - /* slabs are zeroed out when they are newly allocated */ + /* + * page_alloc() always initializes the allocated page to zeroes. + * Therefore, if exact_size is 0, we know this particular slab entry has + * never been used before, and we can skip the check. + */ if (poison->exact_size != 0) { for (u_long *pos = poison_start; pos < &poison->high_poison[offset]; pos++) { if (*pos != SLAB_POISON_FREE) { @@ -377,7 +348,12 @@ static inline void poison_after_free(struct slab_poison *poison) for (u_long *pos = &poison->low_poison; pos <= &poison->high_poison[offset]; pos++) *pos = SLAB_POISON_FREE; } -#endif + +#endif /* CFG_POISON_SLABS */ + +/* + * for certain libc routines + */ __weak void *malloc(usize size) { diff --git a/lib/c/include/string.h b/lib/c/include/string.h index 59c4b90..4b94b34 100644 --- a/lib/c/include/string.h +++ b/lib/c/include/string.h @@ -2,6 +2,8 @@ #pragma once +#include + #include #include @@ -71,6 +73,7 @@ void *memcpy(void *__restrict dest, const void *__restrict src, usize n); */ __pure int memcmp(const void *s1, const void *s2, usize n); +#ifndef __HAVE_ARCH_MEMSET /** * @brief Starting from `ptr`, fill `n` bytes with the constant byte `c`. * @@ -80,6 +83,28 @@ __pure int memcmp(const void *s1, const void *s2, usize n); * @returns A pointer to `ptr` */ void *memset(void *ptr, int c, usize n); +#endif + +#if _GAY_SOURCE >= 202109L +#ifndef __HAVE_ARCH_MEMSET16 +void *memset16(u16 *dest, u16 c, usize nbyte); +#endif +#ifndef __HAVE_ARCH_MEMSET32 +void *memset32(u32 *dest, u32 c, usize nbyte); +#endif +#ifndef __HAVE_ARCH_MEMSET64 +void *memset64(u64 *dest, u64 c, usize nbyte); +#endif + +#include +#if LONG_BIT == 32 +#define memsetl memset32 +#elif LONG_BIT == 64 +#define memsetl memset64 +#else +#error "Unsupported sizeof(long)" +#endif +#endif /** * @brief Copy a memory area.