diff --git a/arch/x86/boot/setup64.S b/arch/x86/boot/setup64.S index 05f9f5f..e0e1928 100644 --- a/arch/x86/boot/setup64.S +++ b/arch/x86/boot/setup64.S @@ -42,8 +42,8 @@ .extern _boot /* main boot routine -- see ./boot.c */ /* initial page maps -- see ../mm/amd64/page.c */ - .extern _pml4 - .extern _pdp0 + .extern _pml4t + .extern _pdpt0 /* GDT stuff -- see ../mm/segment.S */ .extern _x86_gdt_desc @@ -137,8 +137,8 @@ ENTRY(_setup) #endif #define V48 0xffff000000000000 -#define PDP_OFFSET(ptr) (( (((ptr) - V48) >> X86_PDPT_SHIFT) % 512 ) * 8) -#define PML4_OFFSET(ptr) ( ((ptr) - V48) >> (X86_PML4T_SHIFT) * 8 ) +#define PDPT_OFFSET(ptr) (( (((ptr) - V48) >> X86_PDPT_SHIFT) % 512 ) * 8) +#define PML4T_OFFSET(ptr) ( ((ptr) - V48) >> (X86_PML4T_SHIFT) * 8 ) /* * statically map the low 2 GB to itself and to the high kernel half @@ -150,15 +150,15 @@ ENTRY(_setup) * both low and high memory, so techincally this creates a total of four * mappings (+0 GB, +510 GB, -512 GB, -2 GB), but we remove all except * the -2GB one once we have transitioned to high memory. */ - movl $0x00000083, PADDR(_pdpt0 + PDP_OFFSET(KERNBASE)) - movl $0x40000083, PADDR(_pdpt0 + PDP_OFFSET(KERNBASE + 0x40000000)) + movl $0x00000083, PADDR(_pdpt0 + PDPT_OFFSET(KERNBASE)) + movl $0x40000083, PADDR(_pdpt0 + PDPT_OFFSET(KERNBASE + 0x40000000)) movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t) /* present (0), write (1), huge (7) */ - movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t + PML4_OFFSET(KERNBASE)) + movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t + PML4T_OFFSET(KERNBASE)) /* map the PML4 to itself */ - movl $PADDR(_pml4t + 0x003), PADDR(_pml4t + PML4_OFFSET(X86_PMAP_OFFSET)) - movb $0x80, PADDR(_pml4t + PML4_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */ + movl $PADDR(_pml4t + 0x003), PADDR(_pml4t + PML4T_OFFSET(X86_PMAP_OFFSET)) + movb $0x80, PADDR(_pml4t + PML4T_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */ /* * ensure paging is disabled by clearing CR0.PG (bit 31) @@ -192,7 +192,7 @@ ENTRY(_setup) /* * enable: - * CR0.PG (Paging, bit31) + * CR0.PG (Paging, bit 31) * CR0.WP (Write Protect, bit 16) */ movl %cr0, %eax diff --git a/arch/x86/include/amd64/page.h b/arch/x86/include/amd64/page.h index 71a0c19..ef7f393 100644 --- a/arch/x86/include/amd64/page.h +++ b/arch/x86/include/amd64/page.h @@ -63,8 +63,7 @@ /** @brief Binary logarithm of `HUGEPAGE_SIZE`. */ #define HUGEPAGE_SHIFT X86_PDT_SHIFT /** @brief Binary logarithm of `GIGAPAGE_SIZE`. */ -#define GIGAPAGE_SHIFT -#define GIGAPAGE_SIZE (1 << GIGAPAGE_SHIFT) +#define GIGAPAGE_SHIFT X86_PDPT_SHIFT #ifndef _ASM_SOURCE diff --git a/arch/x86/include/arch/dma.h b/arch/x86/include/arch/dma.h new file mode 100644 index 0000000..6266db9 --- /dev/null +++ b/arch/x86/include/arch/dma.h @@ -0,0 +1,6 @@ +/* Copyright (C) 2021 fef . All rights reserved. */ + +#pragma once + +/** @brief Maximum address for legacy DMA transfers */ +#define DMA_LIMIT (1 << 24) diff --git a/arch/x86/include/arch/page.h b/arch/x86/include/arch/page.h index 176c453..cab992a 100644 --- a/arch/x86/include/arch/page.h +++ b/arch/x86/include/arch/page.h @@ -23,6 +23,9 @@ #endif #define HUGEPAGE_SIZE (1 << HUGEPAGE_SHIFT) +#ifdef __HAVE_GIGAPAGES +#define GIGAPAGE_SIZE (1 << GIGAPAGE_SHIFT) +#endif #ifndef _ASM_SOURCE @@ -38,6 +41,10 @@ void x86_paging_init(struct mb2_tag_mmap *mmap); #define PAGE_ALIGN(ptr) ((typeof(ptr))( (uintptr_t)(ptr) & PAGE_MASK )) #define HUGEPAGE_ALIGN(ptr) ((typeof(ptr))( (uintptr_t)(ptr) & HUGEPAGE_MASK )) +#ifdef __HAVE_GIGAPAGES +#define GIGAPAGE_MASK ( ~((unsigned long)GIGAPAGE_SIZE - 1) ) +#define GIGAPAGE_ALIGN(ptr) ((typeof(ptr))( (uintptr_t)(ptr) & GIGAPAGE_MASK )) +#endif /* page fault status code bits */ #define X86_PF_PRESENT (1u << 0) diff --git a/arch/x86/mm/amd64/init.c b/arch/x86/mm/amd64/init.c index 2540a1a..336d848 100644 --- a/arch/x86/mm/amd64/init.c +++ b/arch/x86/mm/amd64/init.c @@ -1,6 +1,7 @@ /* Copyright (C) 2021 fef . All rights reserved. */ #include +#include #include #include @@ -13,91 +14,39 @@ #include #include -/* - * This file is funny. - * Our job here seems simple at first glance: initialize the vm_page_array. - * The catch is that we can't use the regular kernel memory allocators for - * doing so, because those depend on vm_page_array. Classic chicken/egg stuff. - * So, how do we allocate (and map!) memory for the array? Simple, by using a - * completely separate page frame allocator that is so basic that it can't even - * free pages again. That's not a problem though, because it doesn't need to. - * Memory maps are created manually, which is very painful, but doable. - * HOWEVER! This boot page frame allocator needs to allocate memory for keeping - * track of which memory areas were already allocated and which ones are still - * free, too. Areas might also have to be split, if the region we want to - * allocate is not the exact size of the physical area. Therefore, we have - * *another* allocator, which is basically the most primitive slab allocator in - * existence. It uses a fixed-size "slab" (the `free_areas` array below), and - * keeps track of which free areas are available. - * - * To sum up: - * - The boot "slab" allocator hands out `struct free_area`s to ... - * - the boot page frame allocator, which is used to set up ... - * - the buddy page frame allocator, which serves as a backend to ... - * - the kernel slab allocator. - * - * XXX the boot memory allocator could probably be moved to an architecture - * independent file, because it is not really specific to the x86. - */ - struct vm_page *const vm_page_array = (vm_page_t)VM_PAGE_ARRAY_OFFSET; #ifdef DEBUG /* this gets updated in x86_setup_paging() once we know how big the array is */ vm_page_t _vm_page_array_end = (vm_page_t)(VM_PAGE_ARRAY_OFFSET + VM_PAGE_ARRAY_LENGTH); #endif -/** - * @brief Memory area information for the boot page frame allocator. - * The multiboot bootloader gives us an array of memory areas, and tells us - * which ones are available and which aren't. We insert all available areas - * into a circular list (`free_area_list`), and the boot page frame allocator - * iterates over that list for getting memory. - * - * Also, this is probably one of the most unfortunately named structures in the - * entire system, because instances of this structure need to be allocated and, - * well, freed. - */ -struct free_area { - struct clist link; - vm_paddr_t start; - vm_size_t end; -}; -/** @brief This is essentially a very basic slab. */ -static struct free_area free_areas[16]; -/** @brief List of all free memory areas, ordered by ascending address */ -static CLIST(free_area_list); -/** - * @brief List of all the unused members in `free_areas`. - * This is essentially a very basic slab freelist. - */ -static CLIST(free_area_freelist); - -/** - * @brief VERY early page frame allocator. - * - * Allocates `1 << log2` bytes of memory, aligned to at least its own size. - * - * @param log2 Binary logarithm of the allocation size. Must be at least `PAGE_SHIFT`. - * @returns Physical address of the allocated region, or `BOOT_PMALLOC_ERR` on failure - */ -static vm_paddr_t __boot_pmalloc(u_int log2); -#define BOOT_PMALLOC_ERR (~0ul) -/** @brief Zero out a single page (required for page tables) */ -static void __boot_clear_page(vm_paddr_t paddr); - /** @brief Initialize the members of `vm_page_array` within the given range. */ static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags); -/** @brief Add a new entry to the list of free memory areas. */ -static void insert_free_area(struct mb2_mmap_entry *entry); -static void init_free_area_freelist(void); static void print_mem_area(struct mb2_mmap_entry *entry); +static void register_area(struct mb2_mmap_entry *entry) +{ + vm_paddr_t start = entry->addr; + vm_paddr_t end = start + entry->len; + + if (start >= DMA_LIMIT) { + __boot_register_mem_area(start, end, MM_ZONE_NORMAL); + } else if (start < DMA_LIMIT && end > DMA_LIMIT) { + __boot_register_mem_area(start, DMA_LIMIT, MM_ZONE_DMA); + __boot_register_mem_area(DMA_LIMIT, end, MM_ZONE_NORMAL); + } else if (start < DMA_LIMIT && end <= DMA_LIMIT) { + __boot_register_mem_area(start, end, MM_ZONE_DMA); + } else { + panic("congratulations, you reached an unreachable branch"); + } +} + /* * "Oh cool another deeply nested 100-liner that nobody understands" */ void x86_paging_init(struct mb2_tag_mmap *mmap) { - init_free_area_freelist(); + __boot_pmalloc_init(); /* * insert all free areas and find the end of physical memory @@ -110,7 +59,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) end = max(end, entry_end); print_mem_area(entry); if (entry->type == MB2_MEMORY_AVAILABLE) - insert_free_area(entry); + register_area(entry); entry = (void *)entry + mmap->entry_size; } @@ -127,15 +76,23 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) remaining_size = align_ceil(remaining_size, PAGE_SIZE); kprintf("Mapping %zu bytes for vm_page_array\n", remaining_size); + /* PML4T loop */ while (remaining_size != 0) { + /* Is vm_page_array so huge that it spans almost the entire 2 TB + * kernel region? If that's the case, something has gone terribly + * wrong, unless we somehow happen to have about an Exabyte of RAM + * (which is not physically addressable by the CPU's 40-bit bus). */ + KASSERT(map_pos < (void *)KERNBASE); + x86_pml4te_t *pml4te = X86_PML4TE(map_pos); vm_paddr_t pml4te_val = __boot_pmalloc(PAGE_SHIFT); - KASSERT(pml4te_val != BOOT_PMALLOC_ERR); + panic_if(pml4te_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array"); __boot_clear_page(pml4te_val); pml4te_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; pml4te->val = pml4te_val; vm_flush(); + /* PDPT loop */ for (int pdpt_index = 0; pdpt_index < 512; pdpt_index++) { x86_pdpte_t *pdpte = X86_PDPTE(map_pos); vm_paddr_t pdpte_val; @@ -148,7 +105,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) * and clang is emitting the check. So it's fine, i guess. */ if (pdpte_val != BOOT_PMALLOC_ERR) { pdpte_val |= __P_PRESENT | __P_RW | __P_HUGE - | __P_GLOBAL | __P_NOEXEC; + | __P_GLOBAL | __P_NOEXEC; pdpte->val = pdpte_val; remaining_size -= 1 << X86_PDPT_SHIFT; map_pos += 1 << X86_PDPT_SHIFT; @@ -160,12 +117,14 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) /* couldn't use a gigapage, continue in hugepage steps */ pdpte_val = __boot_pmalloc(PAGE_SHIFT); - KASSERT(pdpte_val != BOOT_PMALLOC_ERR); + panic_if(pdpte_val == BOOT_PMALLOC_ERR, + "cannot reserve memory for vm_page_array"); __boot_clear_page(pdpte_val); pdpte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; pdpte->val = pdpte_val; vm_flush(); + /* PDT loop */ for (int pdt_index = 0; pdt_index < 512; pdt_index++) { x86_pdte_t *pdte = X86_PDTE(map_pos); vm_paddr_t pdte_val; @@ -175,7 +134,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) pdte_val = __boot_pmalloc(X86_PDT_SHIFT); if (pdte_val != BOOT_PMALLOC_ERR) { pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL - | __P_HUGE | __P_NOEXEC; + | __P_HUGE | __P_NOEXEC; pdte->val = pdte_val; remaining_size -= 1 << X86_PDT_SHIFT; map_pos += 1 << X86_PDT_SHIFT; @@ -187,16 +146,19 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) /* couldn't use a hugepage, continue in page steps */ pdte_val = __boot_pmalloc(PAGE_SHIFT); - KASSERT(pdte_val != BOOT_PMALLOC_ERR); + panic_if(pdte_val == BOOT_PMALLOC_ERR, + "cannot reserve memory for vm_page_array"); __boot_clear_page(pdpte_val); pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; pdte->val = pdte_val; vm_flush(); + /* PT loop */ for (int pt_index = 0; pt_index < 512; pt_index++) { x86_pte_t *pte = X86_PTE(map_pos); vm_paddr_t pte_val = __boot_pmalloc(X86_PT_SHIFT); - KASSERT(pte_val != BOOT_PMALLOC_ERR); + panic_if(pte_val == BOOT_PMALLOC_ERR, + "cannot reserve memory for vm_page_array"); pte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; pte->val = pte_val; @@ -205,104 +167,12 @@ void x86_paging_init(struct mb2_tag_mmap *mmap) if (remaining_size == 0) goto map_done; } /* end of PT loop */ - } /* end of PD loop */ - } /* end of PDP loop */ - } /* end of PML4 loop */ + } /* end of PDT loop */ + } /* end of PDPT loop */ + } /* end of PML4T loop */ map_done: vm_flush(); - - /* - * initialize the individual pages and calculate the usable RAM size - */ - vm_paddr_t prev_end = 0; - vm_size_t available_ram = 0; - struct free_area *cursor; - clist_foreach_entry(&free_area_list, cursor, link) { - /* list should have been ordered by ascending size */ - KASSERT(cursor->start >= prev_end); - - if (cursor->start != prev_end) { - vm_paddr_t reserved_start = prev_end; - vm_paddr_t reserved_end = cursor->start; - init_page_range(reserved_start, reserved_end, PG_RESERVED); - } - - init_page_range(cursor->start, cursor->end, 0); - prev_end = cursor->end; - available_ram += cursor->end - cursor->start; - } - - kprintf("Available RAM: %"PRIdVM_SIZE" bytes\n", available_ram); -} - -static struct free_area *alloc_free_area_entry(void) -{ - /* XXX this should pretty much never happen, but it would still be nice to - * have at least some sort of error recovery rather than giving up */ - if (clist_is_empty(&free_area_freelist)) - panic("Boot memory allocator has run out of free_areas"); - return clist_del_first_entry(&free_area_freelist, struct free_area, link); -} - -static void free_free_area_entry(struct free_area *area) -{ -#ifdef DEBUG - area->start = ~0ul; - area->end = ~0ul; -#endif - clist_add(&free_area_freelist, &area->link); -} - -static void init_free_area_freelist(void) -{ - for (u_int i = 0; i < ARRAY_SIZE(free_areas); i++) - clist_add(&free_area_freelist, &free_areas[i].link); -} - -static void insert_free_area(struct mb2_mmap_entry *entry) -{ - vm_paddr_t start = align_ceil(entry->addr, PAGE_SIZE); - vm_paddr_t end = align_floor(entry->addr + entry->len, PAGE_SIZE); - if (start <= image_start_phys && end >= image_end_phys) { - /* - * This is the area that the kernel image is loaded in, which we need - * to treat differently than all the others because it gets split up - * into two usable areas. Illustration (addresses are examples only): - * - * 0x01000000 ---------------------- end (high_end) - * : - * 0x00500000 ---------------------- image_end_phys (high_start) - * : - * 0x00400000 ---------------------- image_start_phys (low_end) - * : - * 0x00100000 ---------------------- start (low_start) - * - * (we silently assert that the image always spans only one region) - */ - vm_paddr_t low_start = start; - vm_paddr_t low_end = align_floor(image_start_phys, PAGE_SIZE); - if (low_start < low_end) { - struct free_area *area = alloc_free_area_entry(); - area->start = low_start; - area->end = low_end; - clist_add(&free_area_list, &area->link); - } - - vm_paddr_t high_start = align_ceil(image_end_phys, PAGE_SIZE); - vm_paddr_t high_end = end; - if (high_start < high_end) { - struct free_area *area = alloc_free_area_entry(); - area->start = high_start; - area->end = high_end; - clist_add(&free_area_list, &area->link); - } - } else { - struct free_area *area = alloc_free_area_entry(); - area->start = start; - area->end = end; - clist_add(&free_area_list, &area->link); - } } static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags) @@ -324,64 +194,6 @@ static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags) } } -/* - * This works relatively simple, actually. - * We iterate over the list of `struct free_area`s in reverse order because the - * list is sorted by ascending physical address and i've decided that we prefer - * using higher physical addresses for the page array. The first fit wins, and - * all that's left is to split up the area and insert the top and bottom - * remainder back into the list, if applicable. - */ -static vm_paddr_t __boot_pmalloc(u_int log2) -{ - const usize alloc_size = 1 << log2; - KASSERT(log2 >= PAGE_SHIFT); /* never hand out less than a full page */ - - struct free_area *cursor; - clist_foreach_entry_rev(&free_area_list, cursor, link) { - vm_paddr_t area_start = cursor->start; - vm_paddr_t area_end = cursor->end; - KASSERT(area_start < area_end); - /* the areas tend to be aligned to greater sizes at their beginning */ - vm_paddr_t alloc_start = align_ceil(area_start, alloc_size); - vm_paddr_t alloc_end = alloc_start + alloc_size; - - if (alloc_start >= area_start && alloc_end <= area_end) { - /* - * Example with log2 == 21 (alloc_size == 0x00200000): - * - * 0x00500000 ------------------- area_end (not aligned) - * : - * 0x00400000 ------------------- alloc_end (aligned to alloc_size) - * : - * 0x00200000 ------------------- alloc_start (aligned to alloc_size) - * : - * 0x00100000 ------------------- area_start (not aligned) - */ - - if (alloc_start > area_start) { - struct free_area *low_rest = alloc_free_area_entry(); - low_rest->start = area_start; - low_rest->end = alloc_start; - clist_add(&cursor->link, &low_rest->link); - } - - if (alloc_end < area_end) { - struct free_area *high_rest = alloc_free_area_entry(); - high_rest->start = alloc_end; - high_rest->end = area_end; - clist_add_first(&cursor->link, &high_rest->link); - } - - clist_del(&cursor->link); - free_free_area_entry(cursor); - return alloc_start; - } - } - - return BOOT_PMALLOC_ERR; -} - /* * It's really unfortunate that we have to zero a page before we can use it as * a page table, yet also need to reference it in the page table structures @@ -389,7 +201,7 @@ static vm_paddr_t __boot_pmalloc(u_int log2) * This little hack temporarily maps the area at one PDP entry before KERNBASE * (meaning index 1022 of _pdp0), zeroes the area, and then unmaps it again. */ -static void __boot_clear_page(vm_paddr_t paddr) +void __boot_clear_page(vm_paddr_t paddr) { vm_paddr_t pbase = align_floor(paddr, 1 << X86_PDPT_SHIFT); vm_offset_t offset = paddr - pbase; diff --git a/include/gay/cdefs.h b/include/gay/cdefs.h index c8882ba..28e2061 100644 --- a/include/gay/cdefs.h +++ b/include/gay/cdefs.h @@ -9,15 +9,19 @@ #include #ifdef __cplusplus +#if defined(_KERNEL) && !defined(_CXX_KERNEL) +#error "C++ cannot be used in kernel code. Define _CXX_KERNEL if you know what you're doing." +#endif + /** @brief Use `__restrict` in header files, and just `restrict` in C code */ #define __restrict #define __BEGIN_DELCS extern "C" { #define __END_DECLS } -#else +#else /* not __cplusplus */ #define __BEGIN_DECLS #define __END_DECLS -#endif +#endif /* __cplusplus */ /** @brief Annotated symbol is an alias for another symbol. */ #define __alias(name) __attribute__(( alias(#name) )) diff --git a/include/gay/clist.h b/include/gay/clist.h index 809a904..836025e 100644 --- a/include/gay/clist.h +++ b/include/gay/clist.h @@ -47,6 +47,7 @@ void clist_init(struct clist *list); * @param new New node to insert at the end */ void clist_add(struct clist *head, struct clist *new); +#define clist_insert_before(node, new) clist_add(node, new) /** * @brief Add a new node at the beginning of a clist. @@ -55,6 +56,7 @@ void clist_add(struct clist *head, struct clist *new); * @param new New node to insert at the beginning */ void clist_add_first(struct clist *head, struct clist *new); +#define clist_insert_after(node, new) clist_add_first(node, new) /** * @brief Remove a node from a clist. diff --git a/include/gay/mm.h b/include/gay/mm.h index 3e07e55..886e143 100644 --- a/include/gay/mm.h +++ b/include/gay/mm.h @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -33,12 +34,22 @@ #define _M_EMERG (1 << 1) #define _M_NOWAIT (1 << 2) -#define MM_ZONE_NORMAL 0 -#define MM_ZONE_DMA 1 +enum mm_zone_type { + MM_ZONE_NORMAL = _M_ZONE_NORMAL, + MM_ZONE_DMA = _M_ZONE_DMA, + MM_NR_ZONES +}; + +struct _bmem_area { + struct clist link; /* -> struct mm_zone::_bmem_areas */ + vm_paddr_t start; + vm_paddr_t end; +}; struct mm_zone { - patom_t freelist; /* -> struct page */ + patom_t freelist; /* -> struct vm_page */ usize length; + struct clist _bmem_areas; /* -> struct _bmem_area */ }; /** @@ -48,7 +59,7 @@ struct mm_zone { * The mm subsystem isn't NUMA aware, because it's not really a thing on desktop * grade machines anyway and would only complicate things unnecessarily. */ -extern struct mm_zone mm_zones[2]; +extern struct mm_zone mm_zones[MM_NR_ZONES]; /** * @brief Memory allocation flags passed to `kmalloc()`. @@ -262,4 +273,35 @@ static inline uintptr_t __p(void *virt) return (uintptr_t)virt - DMAP_OFFSET; } +/* + * Boot page frame allocator stuff, don't use these in regular code + */ + +/** @brief Initialize the boot page frame allocator (called from `_paging_init()`) */ +void __boot_pmalloc_init(void); + +/** + * @brief Tell the boot page frame allocator about a free area in RAM. + * The area may overlap with the kernel image; this is checked automatically. + */ +void __boot_register_mem_area(vm_paddr_t start, vm_paddr_t end, enum mm_zone_type zone_type); + +/** + * @brief Allocate a physical memory area. + * + * @param log2 Binary logarithm of the desired allocation size (must be `>= PAGE_SHIFT`) + * @param zone_type What zone to allocate from (you always want `MM_ZONE_NORMAL`) + * @return Allocated region (will be aligned to at least its own size), + * or `BOOT_PMALLOC_ERR` if the request could not be satisfied either + * due to OOM or because the alignment constraints failed + */ +vm_paddr_t __boot_pmalloc(u_int log2, enum mm_zone_type zone_type); +#define BOOT_PMALLOC_ERR ((vm_paddr_t)0 - 1) + +/** + * @brief Zero out a single physical page. + * @param addr Physical address of the page in memory (must be page aligned, obviously) + */ +void __boot_clear_page(vm_paddr_t addr); /* implemented in arch dependent code */ + #endif /* _KERNEL */ diff --git a/include/gay/systm.h b/include/gay/systm.h index 406940a..d4b91f9 100644 --- a/include/gay/systm.h +++ b/include/gay/systm.h @@ -15,6 +15,10 @@ * @param fmt printf style format string */ void panic(const char *fmt, ...) __noreturn __printflike(1, 2); +#define panic_if(condition, msg, ...) do { \ + if (__predict_false(condition)) \ + panic(msg, ##__VA_ARGS__); \ +} while (0) void print_regs(const trap_frame_t *ctx); diff --git a/kernel/mm/CMakeLists.txt b/kernel/mm/CMakeLists.txt index 3905e97..3be2e46 100644 --- a/kernel/mm/CMakeLists.txt +++ b/kernel/mm/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright (C) 2021 fef . All rights reserved. target_sources(gay_kernel PRIVATE + boot.c kmalloc.c page.c slab.c diff --git a/kernel/mm/boot.c b/kernel/mm/boot.c new file mode 100644 index 0000000..819fd31 --- /dev/null +++ b/kernel/mm/boot.c @@ -0,0 +1,183 @@ +/* Copyright (C) 2021 fef . All rights reserved. */ + +#include +#include +#include +#include +#include + +#include + +static struct _bmem_area _bmem_area_cache[16]; +static CLIST(bmem_area_freelist); + +#ifdef DEBUG +#define debug_free_bmem_area(area) ({ (area)->start = ~(vm_paddr_t)0; }) +#define debug_get_bmem_area(area) KASSERT((area)->start != ~(vm_paddr_t)0) +#else +#define debug_free_bmem_area(area) ({}) +#define debug_get_bmem_area(area) ({}) +#endif + +static struct _bmem_area *get_bmem_area(void) +{ + /* XXX this should pretty much never happen, but it would still be nice to + * have at least some sort of error recovery rather than giving up */ + if (clist_is_empty(&bmem_area_freelist)) + panic("Boot memory allocator has run out of areas"); + + struct _bmem_area *area = clist_del_first_entry(&bmem_area_freelist, typeof(*area), link); + debug_get_bmem_area(area); + return area; +} + +static void free_bmem_area(struct _bmem_area *area) +{ + debug_free_bmem_area(area); + clist_add(&bmem_area_freelist, &area->link); +} + +static void insert_area_unsafe(vm_paddr_t start, vm_paddr_t end, enum mm_zone_type zone_type) +{ + KASSERT((start % PAGE_SIZE) == 0); + KASSERT((end % PAGE_SIZE) == 0); + + struct _bmem_area *area = get_bmem_area(); + area->start = start; + area->end = end; + + struct mm_zone *zone = &mm_zones[zone_type]; + struct _bmem_area *cursor; + clist_foreach_entry(&zone->_bmem_areas, cursor, link) { + if (cursor->start > area->start) + break; + } + clist_insert_before(&cursor->link, &area->link); +} + +void __boot_pmalloc_init(void) +{ + for (int i = 0; i < ARRAY_SIZE(_bmem_area_cache); i++) { + struct _bmem_area *area = &_bmem_area_cache[i]; + debug_free_bmem_area(area); + clist_add(&bmem_area_freelist, &area->link); + } +} + +void __boot_register_mem_area(vm_paddr_t start, vm_paddr_t end, enum mm_zone_type zone_type) +{ + KASSERT(start < end); + + start = align_ceil(start, PAGE_SIZE); + end = align_floor(end, PAGE_SIZE); + if (start == end) + return; + + /* check for any overlaps with the kernel image and avoid those regions */ + if (start <= image_start_phys && end >= image_end_phys) { + /* + * 0x8000 ---------------------- end (-> high_end) + * 0x7000 + * 0x6000 ---------------------- image_end_phys (-> high_start) + * 0x5000 + * 0x4000 ---------------------- image_start_phys (-> low_end) + * 0x3000 + * 0x2000 ---------------------- start (-> low_start) + */ + vm_paddr_t low_start = start; + vm_paddr_t low_end = align_floor(image_start_phys, PAGE_SIZE); + if (low_start < low_end) + insert_area_unsafe(low_start, low_end, zone_type); + + vm_paddr_t high_start = align_ceil(image_end_phys, PAGE_SIZE); + vm_paddr_t high_end = end; + if (high_start < high_end) + insert_area_unsafe(high_start, high_end, zone_type); + } else if (start >= image_start_phys && start <= image_end_phys) { + /* + * 0x8000 ---------------------- end (-> high_end) + * 0x7000 + * 0x6000 ---------------------- image_end_phys (-> high_start) + * 0x5000 + * 0x4000 ---------------------- start + * 0x3000 + * 0x2000 ---------------------- image_start_phys + */ + vm_paddr_t high_start = align_ceil(image_end_phys, PAGE_SIZE); + vm_paddr_t high_end = end; + if (high_start < high_end) + insert_area_unsafe(high_start, high_end, zone_type); + } else if (end >= image_start_phys && end <= image_end_phys) { + /* + * 0x8000 ---------------------- image_end_phys + * 0x7000 + * 0x6000 ---------------------- end + * 0x5000 + * 0x4000 ---------------------- image_start_phys (-> low_end) + * 0x3000 + * 0x2000 ---------------------- start (-> low_start) + */ + vm_paddr_t low_start = start; + vm_paddr_t low_end = align_floor(image_start_phys, PAGE_SIZE); + if (low_start < low_end) + insert_area_unsafe(low_start, low_end, zone_type); + } else { + insert_area_unsafe(start, end, zone_type); + } +} + +vm_paddr_t __boot_pmalloc(u_int log2, enum mm_zone_type zone_type) +{ + /* never hand out less than a full page */ + KASSERT(log2 >= PAGE_SHIFT); + /* this might fail if someone accidentally gives us a size rather than shift */ + KASSERT(log2 < sizeof(vm_paddr_t) * CHAR_BIT); + + const vm_size_t alloc_size = (vm_size_t)1 << log2; + struct mm_zone *zone = &mm_zones[zone_type]; + + struct _bmem_area *cursor; + clist_foreach_entry_rev(&zone->_bmem_areas, cursor, link) { + vm_paddr_t area_start = cursor->start; + vm_paddr_t area_end = cursor->end; + KASSERT(area_start < area_end); + + /* XXX we should really use a best-fit algorithm for this */ + vm_paddr_t alloc_start = align_ceil(area_start, alloc_size); + vm_paddr_t alloc_end = alloc_start + alloc_size; + + if (alloc_start >= area_start && alloc_end <= area_end) { + /* + * Example with log2 == 18 (alloc_size == 0x4000): + * + * 0x8000 ------------------- area_end + * 0x7000 + * 0x8000 ------------------- alloc_end (aligned to 0x4000) + * : + * 0x4000 ------------------- alloc_start (aligned to 0x4000) + * 0x3000 + * 0x2000 ------------------- area_start + */ + + if (alloc_start > area_start) { + struct _bmem_area *low_rest = get_bmem_area(); + low_rest->start = area_start; + low_rest->end = alloc_start; + clist_insert_before(&cursor->link, &low_rest->link); + } + + if (alloc_end < area_end) { + struct _bmem_area *high_rest = get_bmem_area(); + high_rest->start = alloc_end; + high_rest->end = area_end; + clist_insert_after(&cursor->link, &high_rest->link); + } + + clist_del(&cursor->link); + free_bmem_area(cursor); + return alloc_start; + } + } + + return BOOT_PMALLOC_ERR; +}