mm: generalize boot allocator

The boot page frame allocator is now architecture
independent.
This is part 2 of the mm subsystem refactor.
main
anna 3 years ago
parent 7285c2e076
commit d19e665d47
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -42,8 +42,8 @@
.extern _boot /* main boot routine -- see ./boot.c */
/* initial page maps -- see ../mm/amd64/page.c */
.extern _pml4
.extern _pdp0
.extern _pml4t
.extern _pdpt0
/* GDT stuff -- see ../mm/segment.S */
.extern _x86_gdt_desc
@ -137,8 +137,8 @@ ENTRY(_setup)
#endif
#define V48 0xffff000000000000
#define PDP_OFFSET(ptr) (( (((ptr) - V48) >> X86_PDPT_SHIFT) % 512 ) * 8)
#define PML4_OFFSET(ptr) ( ((ptr) - V48) >> (X86_PML4T_SHIFT) * 8 )
#define PDPT_OFFSET(ptr) (( (((ptr) - V48) >> X86_PDPT_SHIFT) % 512 ) * 8)
#define PML4T_OFFSET(ptr) ( ((ptr) - V48) >> (X86_PML4T_SHIFT) * 8 )
/*
* statically map the low 2 GB to itself and to the high kernel half
@ -150,15 +150,15 @@ ENTRY(_setup)
* both low and high memory, so techincally this creates a total of four
* mappings (+0 GB, +510 GB, -512 GB, -2 GB), but we remove all except
* the -2GB one once we have transitioned to high memory. */
movl $0x00000083, PADDR(_pdpt0 + PDP_OFFSET(KERNBASE))
movl $0x40000083, PADDR(_pdpt0 + PDP_OFFSET(KERNBASE + 0x40000000))
movl $0x00000083, PADDR(_pdpt0 + PDPT_OFFSET(KERNBASE))
movl $0x40000083, PADDR(_pdpt0 + PDPT_OFFSET(KERNBASE + 0x40000000))
movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t) /* present (0), write (1), huge (7) */
movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t + PML4_OFFSET(KERNBASE))
movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t + PML4T_OFFSET(KERNBASE))
/* map the PML4 to itself */
movl $PADDR(_pml4t + 0x003), PADDR(_pml4t + PML4_OFFSET(X86_PMAP_OFFSET))
movb $0x80, PADDR(_pml4t + PML4_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */
movl $PADDR(_pml4t + 0x003), PADDR(_pml4t + PML4T_OFFSET(X86_PMAP_OFFSET))
movb $0x80, PADDR(_pml4t + PML4T_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */
/*
* ensure paging is disabled by clearing CR0.PG (bit 31)
@ -192,7 +192,7 @@ ENTRY(_setup)
/*
* enable:
* CR0.PG (Paging, bit31)
* CR0.PG (Paging, bit 31)
* CR0.WP (Write Protect, bit 16)
*/
movl %cr0, %eax

@ -63,8 +63,7 @@
/** @brief Binary logarithm of `HUGEPAGE_SIZE`. */
#define HUGEPAGE_SHIFT X86_PDT_SHIFT
/** @brief Binary logarithm of `GIGAPAGE_SIZE`. */
#define GIGAPAGE_SHIFT
#define GIGAPAGE_SIZE (1 << GIGAPAGE_SHIFT)
#define GIGAPAGE_SHIFT X86_PDPT_SHIFT
#ifndef _ASM_SOURCE

@ -0,0 +1,6 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#pragma once
/** @brief Maximum address for legacy DMA transfers */
#define DMA_LIMIT (1 << 24)

@ -23,6 +23,9 @@
#endif
#define HUGEPAGE_SIZE (1 << HUGEPAGE_SHIFT)
#ifdef __HAVE_GIGAPAGES
#define GIGAPAGE_SIZE (1 << GIGAPAGE_SHIFT)
#endif
#ifndef _ASM_SOURCE
@ -38,6 +41,10 @@ void x86_paging_init(struct mb2_tag_mmap *mmap);
#define PAGE_ALIGN(ptr) ((typeof(ptr))( (uintptr_t)(ptr) & PAGE_MASK ))
#define HUGEPAGE_ALIGN(ptr) ((typeof(ptr))( (uintptr_t)(ptr) & HUGEPAGE_MASK ))
#ifdef __HAVE_GIGAPAGES
#define GIGAPAGE_MASK ( ~((unsigned long)GIGAPAGE_SIZE - 1) )
#define GIGAPAGE_ALIGN(ptr) ((typeof(ptr))( (uintptr_t)(ptr) & GIGAPAGE_MASK ))
#endif
/* page fault status code bits */
#define X86_PF_PRESENT (1u << 0)

@ -1,6 +1,7 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#include <arch/atom.h>
#include <arch/dma.h>
#include <arch/multiboot.h>
#include <arch/vmparam.h>
@ -13,91 +14,39 @@
#include <inttypes.h>
#include <string.h>
/*
* This file is funny.
* Our job here seems simple at first glance: initialize the vm_page_array.
* The catch is that we can't use the regular kernel memory allocators for
* doing so, because those depend on vm_page_array. Classic chicken/egg stuff.
* So, how do we allocate (and map!) memory for the array? Simple, by using a
* completely separate page frame allocator that is so basic that it can't even
* free pages again. That's not a problem though, because it doesn't need to.
* Memory maps are created manually, which is very painful, but doable.
* HOWEVER! This boot page frame allocator needs to allocate memory for keeping
* track of which memory areas were already allocated and which ones are still
* free, too. Areas might also have to be split, if the region we want to
* allocate is not the exact size of the physical area. Therefore, we have
* *another* allocator, which is basically the most primitive slab allocator in
* existence. It uses a fixed-size "slab" (the `free_areas` array below), and
* keeps track of which free areas are available.
*
* To sum up:
* - The boot "slab" allocator hands out `struct free_area`s to ...
* - the boot page frame allocator, which is used to set up ...
* - the buddy page frame allocator, which serves as a backend to ...
* - the kernel slab allocator.
*
* XXX the boot memory allocator could probably be moved to an architecture
* independent file, because it is not really specific to the x86.
*/
struct vm_page *const vm_page_array = (vm_page_t)VM_PAGE_ARRAY_OFFSET;
#ifdef DEBUG
/* this gets updated in x86_setup_paging() once we know how big the array is */
vm_page_t _vm_page_array_end = (vm_page_t)(VM_PAGE_ARRAY_OFFSET + VM_PAGE_ARRAY_LENGTH);
#endif
/**
* @brief Memory area information for the boot page frame allocator.
* The multiboot bootloader gives us an array of memory areas, and tells us
* which ones are available and which aren't. We insert all available areas
* into a circular list (`free_area_list`), and the boot page frame allocator
* iterates over that list for getting memory.
*
* Also, this is probably one of the most unfortunately named structures in the
* entire system, because instances of this structure need to be allocated and,
* well, freed.
*/
struct free_area {
struct clist link;
vm_paddr_t start;
vm_size_t end;
};
/** @brief This is essentially a very basic slab. */
static struct free_area free_areas[16];
/** @brief List of all free memory areas, ordered by ascending address */
static CLIST(free_area_list);
/**
* @brief List of all the unused members in `free_areas`.
* This is essentially a very basic slab freelist.
*/
static CLIST(free_area_freelist);
/**
* @brief VERY early page frame allocator.
*
* Allocates `1 << log2` bytes of memory, aligned to at least its own size.
*
* @param log2 Binary logarithm of the allocation size. Must be at least `PAGE_SHIFT`.
* @returns Physical address of the allocated region, or `BOOT_PMALLOC_ERR` on failure
*/
static vm_paddr_t __boot_pmalloc(u_int log2);
#define BOOT_PMALLOC_ERR (~0ul)
/** @brief Zero out a single page (required for page tables) */
static void __boot_clear_page(vm_paddr_t paddr);
/** @brief Initialize the members of `vm_page_array` within the given range. */
static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags);
/** @brief Add a new entry to the list of free memory areas. */
static void insert_free_area(struct mb2_mmap_entry *entry);
static void init_free_area_freelist(void);
static void print_mem_area(struct mb2_mmap_entry *entry);
static void register_area(struct mb2_mmap_entry *entry)
{
vm_paddr_t start = entry->addr;
vm_paddr_t end = start + entry->len;
if (start >= DMA_LIMIT) {
__boot_register_mem_area(start, end, MM_ZONE_NORMAL);
} else if (start < DMA_LIMIT && end > DMA_LIMIT) {
__boot_register_mem_area(start, DMA_LIMIT, MM_ZONE_DMA);
__boot_register_mem_area(DMA_LIMIT, end, MM_ZONE_NORMAL);
} else if (start < DMA_LIMIT && end <= DMA_LIMIT) {
__boot_register_mem_area(start, end, MM_ZONE_DMA);
} else {
panic("congratulations, you reached an unreachable branch");
}
}
/*
* "Oh cool another deeply nested 100-liner that nobody understands"
*/
void x86_paging_init(struct mb2_tag_mmap *mmap)
{
init_free_area_freelist();
__boot_pmalloc_init();
/*
* insert all free areas and find the end of physical memory
@ -110,7 +59,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap)
end = max(end, entry_end);
print_mem_area(entry);
if (entry->type == MB2_MEMORY_AVAILABLE)
insert_free_area(entry);
register_area(entry);
entry = (void *)entry + mmap->entry_size;
}
@ -127,15 +76,23 @@ void x86_paging_init(struct mb2_tag_mmap *mmap)
remaining_size = align_ceil(remaining_size, PAGE_SIZE);
kprintf("Mapping %zu bytes for vm_page_array\n", remaining_size);
/* PML4T loop */
while (remaining_size != 0) {
/* Is vm_page_array so huge that it spans almost the entire 2 TB
* kernel region? If that's the case, something has gone terribly
* wrong, unless we somehow happen to have about an Exabyte of RAM
* (which is not physically addressable by the CPU's 40-bit bus). */
KASSERT(map_pos < (void *)KERNBASE);
x86_pml4te_t *pml4te = X86_PML4TE(map_pos);
vm_paddr_t pml4te_val = __boot_pmalloc(PAGE_SHIFT);
KASSERT(pml4te_val != BOOT_PMALLOC_ERR);
panic_if(pml4te_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array");
__boot_clear_page(pml4te_val);
pml4te_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC;
pml4te->val = pml4te_val;
vm_flush();
/* PDPT loop */
for (int pdpt_index = 0; pdpt_index < 512; pdpt_index++) {
x86_pdpte_t *pdpte = X86_PDPTE(map_pos);
vm_paddr_t pdpte_val;
@ -148,7 +105,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap)
* and clang is emitting the check. So it's fine, i guess. */
if (pdpte_val != BOOT_PMALLOC_ERR) {
pdpte_val |= __P_PRESENT | __P_RW | __P_HUGE
| __P_GLOBAL | __P_NOEXEC;
| __P_GLOBAL | __P_NOEXEC;
pdpte->val = pdpte_val;
remaining_size -= 1 << X86_PDPT_SHIFT;
map_pos += 1 << X86_PDPT_SHIFT;
@ -160,12 +117,14 @@ void x86_paging_init(struct mb2_tag_mmap *mmap)
/* couldn't use a gigapage, continue in hugepage steps */
pdpte_val = __boot_pmalloc(PAGE_SHIFT);
KASSERT(pdpte_val != BOOT_PMALLOC_ERR);
panic_if(pdpte_val == BOOT_PMALLOC_ERR,
"cannot reserve memory for vm_page_array");
__boot_clear_page(pdpte_val);
pdpte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC;
pdpte->val = pdpte_val;
vm_flush();
/* PDT loop */
for (int pdt_index = 0; pdt_index < 512; pdt_index++) {
x86_pdte_t *pdte = X86_PDTE(map_pos);
vm_paddr_t pdte_val;
@ -175,7 +134,7 @@ void x86_paging_init(struct mb2_tag_mmap *mmap)
pdte_val = __boot_pmalloc(X86_PDT_SHIFT);
if (pdte_val != BOOT_PMALLOC_ERR) {
pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL
| __P_HUGE | __P_NOEXEC;
| __P_HUGE | __P_NOEXEC;
pdte->val = pdte_val;
remaining_size -= 1 << X86_PDT_SHIFT;
map_pos += 1 << X86_PDT_SHIFT;
@ -187,16 +146,19 @@ void x86_paging_init(struct mb2_tag_mmap *mmap)
/* couldn't use a hugepage, continue in page steps */
pdte_val = __boot_pmalloc(PAGE_SHIFT);
KASSERT(pdte_val != BOOT_PMALLOC_ERR);
panic_if(pdte_val == BOOT_PMALLOC_ERR,
"cannot reserve memory for vm_page_array");
__boot_clear_page(pdpte_val);
pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC;
pdte->val = pdte_val;
vm_flush();
/* PT loop */
for (int pt_index = 0; pt_index < 512; pt_index++) {
x86_pte_t *pte = X86_PTE(map_pos);
vm_paddr_t pte_val = __boot_pmalloc(X86_PT_SHIFT);
KASSERT(pte_val != BOOT_PMALLOC_ERR);
panic_if(pte_val == BOOT_PMALLOC_ERR,
"cannot reserve memory for vm_page_array");
pte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC;
pte->val = pte_val;
@ -205,104 +167,12 @@ void x86_paging_init(struct mb2_tag_mmap *mmap)
if (remaining_size == 0)
goto map_done;
} /* end of PT loop */
} /* end of PD loop */
} /* end of PDP loop */
} /* end of PML4 loop */
} /* end of PDT loop */
} /* end of PDPT loop */
} /* end of PML4T loop */
map_done:
vm_flush();
/*
* initialize the individual pages and calculate the usable RAM size
*/
vm_paddr_t prev_end = 0;
vm_size_t available_ram = 0;
struct free_area *cursor;
clist_foreach_entry(&free_area_list, cursor, link) {
/* list should have been ordered by ascending size */
KASSERT(cursor->start >= prev_end);
if (cursor->start != prev_end) {
vm_paddr_t reserved_start = prev_end;
vm_paddr_t reserved_end = cursor->start;
init_page_range(reserved_start, reserved_end, PG_RESERVED);
}
init_page_range(cursor->start, cursor->end, 0);
prev_end = cursor->end;
available_ram += cursor->end - cursor->start;
}
kprintf("Available RAM: %"PRIdVM_SIZE" bytes\n", available_ram);
}
static struct free_area *alloc_free_area_entry(void)
{
/* XXX this should pretty much never happen, but it would still be nice to
* have at least some sort of error recovery rather than giving up */
if (clist_is_empty(&free_area_freelist))
panic("Boot memory allocator has run out of free_areas");
return clist_del_first_entry(&free_area_freelist, struct free_area, link);
}
static void free_free_area_entry(struct free_area *area)
{
#ifdef DEBUG
area->start = ~0ul;
area->end = ~0ul;
#endif
clist_add(&free_area_freelist, &area->link);
}
static void init_free_area_freelist(void)
{
for (u_int i = 0; i < ARRAY_SIZE(free_areas); i++)
clist_add(&free_area_freelist, &free_areas[i].link);
}
static void insert_free_area(struct mb2_mmap_entry *entry)
{
vm_paddr_t start = align_ceil(entry->addr, PAGE_SIZE);
vm_paddr_t end = align_floor(entry->addr + entry->len, PAGE_SIZE);
if (start <= image_start_phys && end >= image_end_phys) {
/*
* This is the area that the kernel image is loaded in, which we need
* to treat differently than all the others because it gets split up
* into two usable areas. Illustration (addresses are examples only):
*
* 0x01000000 ---------------------- end (high_end)
* : <free real estate>
* 0x00500000 ---------------------- image_end_phys (high_start)
* : <kernel code & data>
* 0x00400000 ---------------------- image_start_phys (low_end)
* : <free real estate>
* 0x00100000 ---------------------- start (low_start)
*
* (we silently assert that the image always spans only one region)
*/
vm_paddr_t low_start = start;
vm_paddr_t low_end = align_floor(image_start_phys, PAGE_SIZE);
if (low_start < low_end) {
struct free_area *area = alloc_free_area_entry();
area->start = low_start;
area->end = low_end;
clist_add(&free_area_list, &area->link);
}
vm_paddr_t high_start = align_ceil(image_end_phys, PAGE_SIZE);
vm_paddr_t high_end = end;
if (high_start < high_end) {
struct free_area *area = alloc_free_area_entry();
area->start = high_start;
area->end = high_end;
clist_add(&free_area_list, &area->link);
}
} else {
struct free_area *area = alloc_free_area_entry();
area->start = start;
area->end = end;
clist_add(&free_area_list, &area->link);
}
}
static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags)
@ -324,64 +194,6 @@ static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags)
}
}
/*
* This works relatively simple, actually.
* We iterate over the list of `struct free_area`s in reverse order because the
* list is sorted by ascending physical address and i've decided that we prefer
* using higher physical addresses for the page array. The first fit wins, and
* all that's left is to split up the area and insert the top and bottom
* remainder back into the list, if applicable.
*/
static vm_paddr_t __boot_pmalloc(u_int log2)
{
const usize alloc_size = 1 << log2;
KASSERT(log2 >= PAGE_SHIFT); /* never hand out less than a full page */
struct free_area *cursor;
clist_foreach_entry_rev(&free_area_list, cursor, link) {
vm_paddr_t area_start = cursor->start;
vm_paddr_t area_end = cursor->end;
KASSERT(area_start < area_end);
/* the areas tend to be aligned to greater sizes at their beginning */
vm_paddr_t alloc_start = align_ceil(area_start, alloc_size);
vm_paddr_t alloc_end = alloc_start + alloc_size;
if (alloc_start >= area_start && alloc_end <= area_end) {
/*
* Example with log2 == 21 (alloc_size == 0x00200000):
*
* 0x00500000 ------------------- area_end (not aligned)
* : <high_rest>
* 0x00400000 ------------------- alloc_end (aligned to alloc_size)
* : <allocated block>
* 0x00200000 ------------------- alloc_start (aligned to alloc_size)
* : <low_rest>
* 0x00100000 ------------------- area_start (not aligned)
*/
if (alloc_start > area_start) {
struct free_area *low_rest = alloc_free_area_entry();
low_rest->start = area_start;
low_rest->end = alloc_start;
clist_add(&cursor->link, &low_rest->link);
}
if (alloc_end < area_end) {
struct free_area *high_rest = alloc_free_area_entry();
high_rest->start = alloc_end;
high_rest->end = area_end;
clist_add_first(&cursor->link, &high_rest->link);
}
clist_del(&cursor->link);
free_free_area_entry(cursor);
return alloc_start;
}
}
return BOOT_PMALLOC_ERR;
}
/*
* It's really unfortunate that we have to zero a page before we can use it as
* a page table, yet also need to reference it in the page table structures
@ -389,7 +201,7 @@ static vm_paddr_t __boot_pmalloc(u_int log2)
* This little hack temporarily maps the area at one PDP entry before KERNBASE
* (meaning index 1022 of _pdp0), zeroes the area, and then unmaps it again.
*/
static void __boot_clear_page(vm_paddr_t paddr)
void __boot_clear_page(vm_paddr_t paddr)
{
vm_paddr_t pbase = align_floor(paddr, 1 << X86_PDPT_SHIFT);
vm_offset_t offset = paddr - pbase;

@ -9,15 +9,19 @@
#include <gay/_null.h>
#ifdef __cplusplus
#if defined(_KERNEL) && !defined(_CXX_KERNEL)
#error "C++ cannot be used in kernel code. Define _CXX_KERNEL if you know what you're doing."
#endif
/** @brief Use `__restrict` in header files, and just `restrict` in C code */
#define __restrict
#define __BEGIN_DELCS extern "C" {
#define __END_DECLS }
#else
#else /* not __cplusplus */
#define __BEGIN_DECLS
#define __END_DECLS
#endif
#endif /* __cplusplus */
/** @brief Annotated symbol is an alias for another symbol. */
#define __alias(name) __attribute__(( alias(#name) ))

@ -47,6 +47,7 @@ void clist_init(struct clist *list);
* @param new New node to insert at the end
*/
void clist_add(struct clist *head, struct clist *new);
#define clist_insert_before(node, new) clist_add(node, new)
/**
* @brief Add a new node at the beginning of a clist.
@ -55,6 +56,7 @@ void clist_add(struct clist *head, struct clist *new);
* @param new New node to insert at the beginning
*/
void clist_add_first(struct clist *head, struct clist *new);
#define clist_insert_after(node, new) clist_add_first(node, new)
/**
* @brief Remove a node from a clist.

@ -23,6 +23,7 @@
#include <arch/page.h>
#include <gay/cdefs.h>
#include <gay/config.h>
#include <gay/kprintf.h>
#include <gay/types.h>
@ -33,12 +34,22 @@
#define _M_EMERG (1 << 1)
#define _M_NOWAIT (1 << 2)
#define MM_ZONE_NORMAL 0
#define MM_ZONE_DMA 1
enum mm_zone_type {
MM_ZONE_NORMAL = _M_ZONE_NORMAL,
MM_ZONE_DMA = _M_ZONE_DMA,
MM_NR_ZONES
};
struct _bmem_area {
struct clist link; /* -> struct mm_zone::_bmem_areas */
vm_paddr_t start;
vm_paddr_t end;
};
struct mm_zone {
patom_t freelist; /* -> struct page */
patom_t freelist; /* -> struct vm_page */
usize length;
struct clist _bmem_areas; /* -> struct _bmem_area */
};
/**
@ -48,7 +59,7 @@ struct mm_zone {
* The mm subsystem isn't NUMA aware, because it's not really a thing on desktop
* grade machines anyway and would only complicate things unnecessarily.
*/
extern struct mm_zone mm_zones[2];
extern struct mm_zone mm_zones[MM_NR_ZONES];
/**
* @brief Memory allocation flags passed to `kmalloc()`.
@ -262,4 +273,35 @@ static inline uintptr_t __p(void *virt)
return (uintptr_t)virt - DMAP_OFFSET;
}
/*
* Boot page frame allocator stuff, don't use these in regular code
*/
/** @brief Initialize the boot page frame allocator (called from `<arch>_paging_init()`) */
void __boot_pmalloc_init(void);
/**
* @brief Tell the boot page frame allocator about a free area in RAM.
* The area may overlap with the kernel image; this is checked automatically.
*/
void __boot_register_mem_area(vm_paddr_t start, vm_paddr_t end, enum mm_zone_type zone_type);
/**
* @brief Allocate a physical memory area.
*
* @param log2 Binary logarithm of the desired allocation size (must be `>= PAGE_SHIFT`)
* @param zone_type What zone to allocate from (you always want `MM_ZONE_NORMAL`)
* @return Allocated region (will be aligned to at least its own size),
* or `BOOT_PMALLOC_ERR` if the request could not be satisfied either
* due to OOM or because the alignment constraints failed
*/
vm_paddr_t __boot_pmalloc(u_int log2, enum mm_zone_type zone_type);
#define BOOT_PMALLOC_ERR ((vm_paddr_t)0 - 1)
/**
* @brief Zero out a single physical page.
* @param addr Physical address of the page in memory (must be page aligned, obviously)
*/
void __boot_clear_page(vm_paddr_t addr); /* implemented in arch dependent code */
#endif /* _KERNEL */

@ -15,6 +15,10 @@
* @param fmt printf style format string
*/
void panic(const char *fmt, ...) __noreturn __printflike(1, 2);
#define panic_if(condition, msg, ...) do { \
if (__predict_false(condition)) \
panic(msg, ##__VA_ARGS__); \
} while (0)
void print_regs(const trap_frame_t *ctx);

@ -1,6 +1,7 @@
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
target_sources(gay_kernel PRIVATE
boot.c
kmalloc.c
page.c
slab.c

@ -0,0 +1,183 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#include <gay/clist.h>
#include <gay/linker.h>
#include <gay/mm.h>
#include <gay/systm.h>
#include <gay/util.h>
#include <limits.h>
static struct _bmem_area _bmem_area_cache[16];
static CLIST(bmem_area_freelist);
#ifdef DEBUG
#define debug_free_bmem_area(area) ({ (area)->start = ~(vm_paddr_t)0; })
#define debug_get_bmem_area(area) KASSERT((area)->start != ~(vm_paddr_t)0)
#else
#define debug_free_bmem_area(area) ({})
#define debug_get_bmem_area(area) ({})
#endif
static struct _bmem_area *get_bmem_area(void)
{
/* XXX this should pretty much never happen, but it would still be nice to
* have at least some sort of error recovery rather than giving up */
if (clist_is_empty(&bmem_area_freelist))
panic("Boot memory allocator has run out of areas");
struct _bmem_area *area = clist_del_first_entry(&bmem_area_freelist, typeof(*area), link);
debug_get_bmem_area(area);
return area;
}
static void free_bmem_area(struct _bmem_area *area)
{
debug_free_bmem_area(area);
clist_add(&bmem_area_freelist, &area->link);
}
static void insert_area_unsafe(vm_paddr_t start, vm_paddr_t end, enum mm_zone_type zone_type)
{
KASSERT((start % PAGE_SIZE) == 0);
KASSERT((end % PAGE_SIZE) == 0);
struct _bmem_area *area = get_bmem_area();
area->start = start;
area->end = end;
struct mm_zone *zone = &mm_zones[zone_type];
struct _bmem_area *cursor;
clist_foreach_entry(&zone->_bmem_areas, cursor, link) {
if (cursor->start > area->start)
break;
}
clist_insert_before(&cursor->link, &area->link);
}
void __boot_pmalloc_init(void)
{
for (int i = 0; i < ARRAY_SIZE(_bmem_area_cache); i++) {
struct _bmem_area *area = &_bmem_area_cache[i];
debug_free_bmem_area(area);
clist_add(&bmem_area_freelist, &area->link);
}
}
void __boot_register_mem_area(vm_paddr_t start, vm_paddr_t end, enum mm_zone_type zone_type)
{
KASSERT(start < end);
start = align_ceil(start, PAGE_SIZE);
end = align_floor(end, PAGE_SIZE);
if (start == end)
return;
/* check for any overlaps with the kernel image and avoid those regions */
if (start <= image_start_phys && end >= image_end_phys) {
/*
* 0x8000 ---------------------- end (-> high_end)
* 0x7000 <free real estate>
* 0x6000 ---------------------- image_end_phys (-> high_start)
* 0x5000 <kernel code & data>
* 0x4000 ---------------------- image_start_phys (-> low_end)
* 0x3000 <free real estate>
* 0x2000 ---------------------- start (-> low_start)
*/
vm_paddr_t low_start = start;
vm_paddr_t low_end = align_floor(image_start_phys, PAGE_SIZE);
if (low_start < low_end)
insert_area_unsafe(low_start, low_end, zone_type);
vm_paddr_t high_start = align_ceil(image_end_phys, PAGE_SIZE);
vm_paddr_t high_end = end;
if (high_start < high_end)
insert_area_unsafe(high_start, high_end, zone_type);
} else if (start >= image_start_phys && start <= image_end_phys) {
/*
* 0x8000 ---------------------- end (-> high_end)
* 0x7000 <free real estate>
* 0x6000 ---------------------- image_end_phys (-> high_start)
* 0x5000 <kernel code & data>
* 0x4000 ---------------------- start
* 0x3000 <not part of area>
* 0x2000 ---------------------- image_start_phys
*/
vm_paddr_t high_start = align_ceil(image_end_phys, PAGE_SIZE);
vm_paddr_t high_end = end;
if (high_start < high_end)
insert_area_unsafe(high_start, high_end, zone_type);
} else if (end >= image_start_phys && end <= image_end_phys) {
/*
* 0x8000 ---------------------- image_end_phys
* 0x7000 <not part of area>
* 0x6000 ---------------------- end
* 0x5000 <kernel code & data>
* 0x4000 ---------------------- image_start_phys (-> low_end)
* 0x3000 <free real estate>
* 0x2000 ---------------------- start (-> low_start)
*/
vm_paddr_t low_start = start;
vm_paddr_t low_end = align_floor(image_start_phys, PAGE_SIZE);
if (low_start < low_end)
insert_area_unsafe(low_start, low_end, zone_type);
} else {
insert_area_unsafe(start, end, zone_type);
}
}
vm_paddr_t __boot_pmalloc(u_int log2, enum mm_zone_type zone_type)
{
/* never hand out less than a full page */
KASSERT(log2 >= PAGE_SHIFT);
/* this might fail if someone accidentally gives us a size rather than shift */
KASSERT(log2 < sizeof(vm_paddr_t) * CHAR_BIT);
const vm_size_t alloc_size = (vm_size_t)1 << log2;
struct mm_zone *zone = &mm_zones[zone_type];
struct _bmem_area *cursor;
clist_foreach_entry_rev(&zone->_bmem_areas, cursor, link) {
vm_paddr_t area_start = cursor->start;
vm_paddr_t area_end = cursor->end;
KASSERT(area_start < area_end);
/* XXX we should really use a best-fit algorithm for this */
vm_paddr_t alloc_start = align_ceil(area_start, alloc_size);
vm_paddr_t alloc_end = alloc_start + alloc_size;
if (alloc_start >= area_start && alloc_end <= area_end) {
/*
* Example with log2 == 18 (alloc_size == 0x4000):
*
* 0x8000 ------------------- area_end
* 0x7000 <high_rest>
* 0x8000 ------------------- alloc_end (aligned to 0x4000)
* : <allocated block>
* 0x4000 ------------------- alloc_start (aligned to 0x4000)
* 0x3000 <low_rest>
* 0x2000 ------------------- area_start
*/
if (alloc_start > area_start) {
struct _bmem_area *low_rest = get_bmem_area();
low_rest->start = area_start;
low_rest->end = alloc_start;
clist_insert_before(&cursor->link, &low_rest->link);
}
if (alloc_end < area_end) {
struct _bmem_area *high_rest = get_bmem_area();
high_rest->start = alloc_end;
high_rest->end = area_end;
clist_insert_after(&cursor->link, &high_rest->link);
}
clist_del(&cursor->link);
free_bmem_area(cursor);
return alloc_start;
}
}
return BOOT_PMALLOC_ERR;
}
Loading…
Cancel
Save