kmalloc: add shiny new buddy page frame allocator

This is still kind of a work in progress because it will be the backend to a slab allocator, which in turn is managed by kmalloc().
2021-10-22 06:22:38 +02:00 · 2021-10-22 06:22:38 +02:00 · 96378f019c
commit 96378f019c
parent f0706b802b
9 changed files with 955 additions and 519 deletions
--- a/arch/x86/boot/setup32.S
+++ b/arch/x86/boot/setup32.S
@ -8,7 +8,7 @@
 #include <gay/config.h>

 /*
- * Early boot sequence on amd64.
+ * Early boot sequence on i386.
 *
 * This is based on the example code from the OSDev.org wiki:
 * <https://wiki.osdev.org/Higher_Half_x86_Bare_Bones>
@ -21,7 +21,7 @@
 * We need to pay special attention to not touch eax and ebx during the entire
 * routine, because those store the multiboot2 magic number and tag table
 * pointer respectively which need to be passed on to _boot().  We can't push
- * them on the stack, because the stack doesn't exist yet.
+ * them to the stack, because the stack doesn't exist yet.
 */

 	.extern _image_start_phys
@ -41,7 +41,6 @@
 #define phys_addr(c_symbol) (c_symbol - KERN_OFFSET)

 ASM_ENTRY(_setup)
-	cli
 	/*
 	 * The kernel image starts at 1 MiB into physical memory.
 	 * We currently assume the kernel is < 3 MiB
@ -125,16 +124,16 @@ ASM_ENTRY(_setup)
 	 */
 	movl	$(phys_addr(pd0) + 0x003), phys_addr(pd0) + 1023 * 4 /* 0xffc00000 */

-	/* set the Page Size Extensions bit in cr4 */
+	/* set the Page Size Extensions (4) and Page Global Enable (7) bits in cr4 */
 	mov	%cr4, %ecx
-	or	$0x00000010, %ecx
+	or	$0x00000090, %ecx
 	mov	%ecx, %cr4

 	/* put the (physical) address of pd0 into cr3 so it will be used */
 	mov	$phys_addr(pd0), %ecx
 	mov	%ecx, %cr3

-	/* set the paging and write-protect bit in cr0 */
+	/* set the Paging (31) and Write Protect (16) bits in cr0 */
 	mov	%cr0, %ecx
 	or	$0x80010000, %ecx
 	mov	%ecx, %cr0
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@ -1,13 +1,7 @@
 /* See the end of this file for copyright and license terms. */

 /*
- * Right now i just want to get stuff working, so we use a very basic
- * algorithm for finding free pages: every page maps to a bit in a bitmap.
- * If the bit is set, the page is in use.  kmalloc() also just always hands
- * out entire pages, so let's just hope we never need more than PAGE_SIZE bytes
- * of contiguous memory lmao
- *
- * To manipulate the page directory once paging is enabled, we abuse the
+ * To manipulate the page directory while paging is enabled, we abuse the
 * structural similarity between page directory and page table by mapping the
 * last entry in the page directory to itself.  This makes the MMU interpret the
 * page directory as if it were a page table, giving us access to the individual
@ -45,40 +39,6 @@ __asmlink struct x86_page_table pt0;
 /** @brief First page directory for low memory. */
 __asmlink struct x86_page_directory pd0;

-int mem_init(uintptr_t start_phys, uintptr_t end_phys)
-{
-	/*
-	 * if the kernel image is loaded within the paging region (which is
-	 * almost certainly the case), we need to increase the start address
-	 * to the end of the kernel image so we won't hand out pages that
-	 * actually store kernel data
-	 */
-	if ((uintptr_t)&_image_start_phys >= start_phys && (uintptr_t)&_image_start_phys <= end_phys)
-		start_phys = (uintptr_t)&_image_end_phys;
-
-	dynpage_start = (uintptr_t)ptr_align((void *)start_phys, PAGE_SHIFT);
-	dynpage_end = (uintptr_t)ptr_align((void *)end_phys, -PAGE_SHIFT);
-
-	if (dynpage_end - dynpage_start < 1024 * PAGE_SIZE) {
-		kprintf("We have < 1024 pages for kmalloc(), this wouldn't go well\n");
-		return -1;
-	}
-	/*
-	 * Add an arbitrary offset to where dynpages actually start.
-	 * I have no idea if this is necessary, but i think it might be possible
-	 * that grub stores its info tags right after the kernel image which
-	 * would blow up _boot().  Until this is resolved, we just throw away
-	 * a couple KiB of RAM to be on the safe side.  Techbros cope.
-	 */
-	dynpage_start += 32 * PAGE_SIZE;
-
-	kprintf("Available memory: %zu bytes (%lu pages)\n",
-		dynpage_end - dynpage_start,
-		(unsigned long)(dynpage_end - dynpage_start) / PAGE_SIZE);
-
-	return 0;
-}
-
 int map_page(uintptr_t phys, void *virt, enum mm_page_flags flags)
 {
 #	ifdef DEBUG
@ -124,7 +84,7 @@ int map_page(uintptr_t phys, void *virt, enum mm_page_flags flags)
 	struct x86_page_table *pt = X86_CURRENT_PT(pd_index);

 	if (!pde->present) {
-		uintptr_t pt_phys = get_page();
+		uintptr_t pt_phys = vtophys(get_pages(1, MM_ATOMIC));
 		if (!pt_phys)
 			return -ENOMEM;

@ -221,7 +181,7 @@ void x86_isr_page_fault(struct x86_trap_frame *frame, u32 error_code)
 	);
 }

-uintptr_t virt_to_phys(void *virt)
+uintptr_t vtophys(void *virt)
 {
 	usize pd_index = ((uintptr_t)virt >> PAGE_SHIFT) / 1024;
 	usize pt_index = ((uintptr_t)virt >> PAGE_SHIFT) % 1024;
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@ -16,6 +16,8 @@ set(CFG_POISON_HEAP "Poison heap memory after kmalloc() and kfree()" ON)

 set(CFG_DEBUG_IRQ "Debug IRQs" ON)

+set(CFG_DEBUG_PAGE_ALLOCS "Debug page frame allocations" ON)
+
 # This file is part of GayBSD.
 # Copyright (c) 2021 fef <owo@fef.moe>.
 #
--- a/include/gay/config.h.in
+++ b/include/gay/config.h.in
@ -34,8 +34,8 @@
 /** @brief Debug IRQs */
 #cmakedefine01 CFG_DEBUG_IRQ

-/** @brief Maximum number of tasks */
-#define CFG_SCHED_MAX_TASKS @SCHED_MAX_TASKS@
+/** @brief Debug page frame allocations */
+#cmakedefine01 CFG_DEBUG_PAGE_ALLOCS

 /*
 * This file is part of GayBSD.
--- a/include/gay/mm.h
+++ b/include/gay/mm.h
@ -7,7 +7,7 @@
 * @brief Header for dynamic memory management
 *
 * To avoid possible confusion, physical memory addresses always use type
- * `uintptr_t` and virtual ones are `void *`.  This should give at least some
+ * `uintptr_t` and virtual ones are `void *`.  This should give us at least some
 * type of compiler warning if they are accidentally mixed up.
 */

@ -61,30 +61,11 @@ enum mm_page_flags {
 	MM_PAGE_DIRTY		= (1 << 4),
 	MM_PAGE_GLOBAL		= (1 << 5),
 	MM_PAGE_NOCACHE		= (1 << 6),
-#ifdef __HAS_HUGEPAGES
+#ifdef __HAVE_HUGEPAGES
 	MM_PAGE_HUGE		= (1 << 7),
 #endif
 };

-/**
- * @brief Get a free memory page.
- *
- * This is only called internally by `kmalloc()`, don't use.
- * Must be deallocated with `put_page()` after use.
- *
- * @returns A pointer to the beginning of the (physical) page address, or `NULL` if OOM
- */
-uintptr_t get_page(void);
-
-/**
- * @brief Release a memory page.
- *
- * This is only called internally by `kmalloc()`, don't use.
- *
- * @param phys The pointer returned by `get_page()`
- */
-void put_page(uintptr_t phys);
-
 /**
 * @brief Map a page in physical memory to a virtual address.
 * Remember that if `vm` is the memory map currently in use, you will most
@ -109,22 +90,63 @@ uintptr_t unmap_page(void *virt);
 /** @brief Flush the TLB. */
 void vm_flush(void);

-/**
- * @brief Called internally by `kmalloc_init()` to set up the page frame
- * allocator and other low level paging related stuff.
- */
-int mem_init(uintptr_t start, uintptr_t end);
-
 /**
 * @brief Initialize the memory allocator.
 *
 * This can only be called once, from the early `_boot()` routine.
 *
- * @param start Physical start address of the page area
- * @param end Physical end address of the page area
+ * @param _phys_start Physical start address of the page area
+ * @param _phys_end Physical end address of the page area
 * @returns 0 on success, or -1 if the pointers were garbage
 */
-int kmalloc_init(uintptr_t start, uintptr_t end);
+int kmalloc_init(uintptr_t _phys_start, uintptr_t _phys_end);
+
+/** @brief Start of the mapped, physically contiguous kernel heap */
+extern void *kheap_start;
+/** @brief End of the mapped, physically contiguous kernel heap */
+extern void *kheap_end;
+
+/** @brief Start of the kernel heap in physical memory */
+extern uintptr_t phys_start;
+/** @brief End of the kernel heap in physical memory */
+extern uintptr_t phys_end;
+
+/**
+ * @brief Initialize the buddy page frame allocator.
+ * This is only called once, internally from `kmalloc_init()`.
+ *
+ * @return 0 on success, or -1 if it messed up
+ */
+int pages_init(void);
+
+/**
+ * @brief Allocate and map a contiguous region in physical memory.
+ * The physical region will be mapped to its corresponding virtual address
+ * between `DMAP_START` and `DMAP_END`, such that the physical address can be
+ * calculated with `ptr - DMAP_OFFSET`.
+ *
+ * @param count Number of contiguous pages to allocate
+ * @param flags
+ * @return
+ */
+void *get_pages(usize count, enum mm_flags flags);
+
+void free_pages(void *ptr, usize count);
+
+/**
+ * @brief Return where a physical address maps to in the direct memory area.
+ * The returned pointer will be within the range `DMAP_START` (inclusive)
+ * and `DMAP_END` (exclusive).
+ *
+ * @param phys
+ * @return
+ */
+static __always_inline void *__v(uintptr_t phys)
+{
+	if (phys > phys_end)
+		return nil;
+	return (void *)phys + DMAP_OFFSET;
+}

 #endif /* _KERNEL */

--- a/kernel/mm/CMakeLists.txt
+++ b/kernel/mm/CMakeLists.txt
@ -2,6 +2,7 @@

 target_sources(gay_kernel PRIVATE
    kmalloc.c
+    page.c
 )

 # This file is part of GayBSD.
--- a/kernel/mm/block.c
+++ b/kernel/mm/block.c
@ -0,0 +1,464 @@
+/* See the end of this file for copyright and license terms. */
+
+#include <arch/page.h>
+
+#include <gay/arith.h>
+#include <gay/cdefs.h>
+#include <gay/clist.h>
+#include <gay/config.h>
+#include <gay/errno.h>
+#include <gay/kprintf.h>
+#include <gay/mm.h>
+#include <gay/types.h>
+
+#include <string.h>
+
+/*
+ * This allocator is based on the popular design by Doug Lea:
+ * <http://gee.cs.oswego.edu/dl/html/malloc.html>
+ * For a more in-depth description of how the individual parts work together,
+ * see also my implementation for Ardix which is very similar except that it
+ * doesn't have paging:
+ * <https://git.bsd.gay/fef/ardix/src/commit/c767d551d3301fc30f9fce30eda8f04e2f9a42ab/kernel/mm.c>
+ * As a matter of fact, this allocator is merely an extension of the one from
+ * Ardix with the only difference being that the heap can be extended upwards.
+ */
+
+/**
+ * Memory block header.
+ * This sits at the beginning of every memory block (duh).
+ */
+struct memblk {
+	/**
+	 * @brief The usable low_size, i.e. the total block low_size minus `MEMBLK_OVERHEAD`.
+	 *
+	 * This low_size will also be written to the very end of the block, just after
+	 * the last usable address.  Additionally, since blocks are always aligned
+	 * to at least 4 bytes anyways, we can use the LSB of this low_size as a flag
+	 * for whether the block is currently allocated (1) or not (0).  This is
+	 * going to make it much easier to detect two free neighboring blocks when
+	 * `kfree()`ing one.
+	 */
+	usize low_size[1];
+
+	union {
+		/** @brief If the block is allocated, this will be overwritten */
+		struct clist clink;
+
+		/** @brief Used as the return value for `kmalloc()` */
+		u8 data[0];
+		/**
+		 * @brief Used to get the copy of the size at the end of
+		 * the block, right after the last byte of `data`
+		 */
+		usize high_size[0];
+	};
+};
+
+/* overhead per memory block in bytes (the two block sizes at the beginning and end) */
+#define OVERHEAD (2 * sizeof(usize))
+/* every allocation is padded to a multiple of this */
+#define MIN_SIZE (sizeof(struct clist))
+
+/* memory blocks, sorted by increasing size */
+static CLIST(blocks);
+
+/*
+ * We play it *really* simple:  Start at an arbitrary (page aligned, preferably
+ * even page table aligned) address in virtual memory and extend the area as
+ * needed as the heap grows.  Efficiency doesn't matter for now; we always make
+ * the heap a contiguous area without holes.  There isn't even a mechanism for
+ * releasing physical pages yet, i really just want to get to anything that is
+ * at all usable so i can finally work on the core system architecture.
+ */
+static void *heap_start = (void *)0xd0000000;
+/*
+ * Points to the first address that is not part of the heap anymore, such that
+ *   sizeof(heap) == heap_end - heap_start
+ * Thus, the heap initially has a size of zero.
+ */
+static void *heap_end = (void *)0xd0000000;
+
+/**
+ * @brief Increase `heap_end` by up to `num_pages * PAGE_SIZE`.
+ *
+ * @param num_pages Number of pages to increase the heap by
+ * @returns The actual number of pages the heap was increased by; this may be
+ *	less than `num_pages` if there were not enough free pages left
+ */
+static usize grow_heap(usize num_pages);
+/**
+ * @brief Add a new block at the end of the heap by downloading more RAM (`grow_heap()`, actually). */
+static struct memblk *blk_create(usize num_pages);
+/** @brief Get the usable block size in bytes, without flags or overhead. */
+static usize blk_get_size(struct memblk *blk);
+/** @brief Set the usable block size without overhead and without affecting flags. */
+static void blk_set_size(struct memblk *blk, usize size);
+/** @brief Flag a block as allocated. */
+static void blk_set_alloc(struct memblk *blk);
+/** @brief Remove the allocated flag from a block. */
+static void blk_clear_alloc(struct memblk *blk);
+/** @brief Return nonzero if the block is allocated. */
+static bool blk_is_alloc(struct memblk *blk);
+/** @brief Set the border flag at the start of a block. */
+static void blk_set_border_start(struct memblk *blk);
+/** @brief Remove the border flag from the start of a block. */
+static void blk_clear_border_start(struct memblk *blk);
+/** @brief Return nonzero if a block has the border flag set at the start. */
+static bool blk_is_border_start(struct memblk *blk);
+/** @brief Set the border flag at the end of a block. */
+static void blk_set_border_end(struct memblk *blk);
+/** @brief Remove the border flag from the end of a block. */
+static void blk_clear_border_end(struct memblk *blk);
+/** @brief Return nonzero if a block has the border flag set at the end. */
+static bool blk_is_border_end(struct memblk *blk);
+/** @brief Get a block's immediate lower neighbor, or NULL if it doesn't have one. */
+static struct memblk *blk_prev(struct memblk *blk);
+/** @brief Get a block's immediate higher neighbor, or NULL if it doesn't have one. */
+static struct memblk *blk_next(struct memblk *blk);
+/** @brief Merge two contiguous free blocks into one, resort the list, and return the block. */
+static struct memblk *blk_merge(struct memblk *bottom, struct memblk *top);
+/** @brief Attempt to merge both the lower and higher neighbors of a free block. */
+static struct memblk *blk_try_merge(struct memblk *blk);
+/** @brief Cut a slice from a free block and return the slice. */
+static struct memblk *blk_slice(struct memblk *blk, usize slice_size);
+
+int kmalloc_init(uintptr_t phys_start, uintptr_t phys_end)
+{
+	int err = mem_init(phys_start, phys_end);
+	if (err)
+		return err;
+
+	if (grow_heap(1) != 1)
+		return -ENOMEM;
+
+	struct memblk *blk = heap_start;
+	blk_set_size(blk, PAGE_SIZE - OVERHEAD);
+	blk_clear_alloc(blk);
+	blk_set_border_start(blk);
+	blk_set_border_end(blk);
+	clist_add(&blocks, &blk->clink);
+
+	return 0;
+}
+
+void *kmalloc(usize size, enum mm_flags flags)
+{
+	if (flags != MM_KERN) {
+		kprintf("Invalid flags passed to kmalloc()\n");
+		return NULL;
+	}
+
+	if (size == 0)
+		return NULL;
+
+	if (size % MIN_SIZE != 0)
+		size = (size / MIN_SIZE) * MIN_SIZE + MIN_SIZE;
+
+	struct memblk *cursor;
+	struct memblk *blk = NULL;
+	clist_foreach_entry(&blocks, cursor, clink) {
+		if (blk_get_size(cursor) >= size) {
+			blk = cursor;
+			break;
+		}
+	}
+
+	if (blk == NULL) {
+		usize required_pages = ((size + OVERHEAD) / PAGE_SIZE) + 1;
+		blk = blk_create(required_pages);
+		if (blk == NULL) {
+			kprintf("Kernel OOM qwq\n");
+			return NULL;
+		}
+		clist_add(&blocks, &blk->clink);
+	}
+
+	blk = blk_slice(blk, size);
+	blk_set_alloc(blk);
+#	if CFG_POISON_HEAP
+		memset(blk->data, 'a', blk_get_size(blk));
+#	endif
+	return blk->data;
+}
+
+void kfree(void *ptr)
+{
+#	ifdef DEBUG
+		if (ptr < heap_start || ptr > heap_end) {
+			kprintf("Tried to free %p which is outside the heap!\n", ptr);
+			return;
+		}
+#	endif
+
+	struct memblk *blk = ptr - sizeof(blk->low_size);
+#	ifdef DEBUG
+		if (!blk_is_alloc(blk)) {
+			kprintf("Double free of %p!\n", ptr);
+			return;
+		}
+#	endif
+
+#       if CFG_POISON_HEAP
+		memset(blk->data, 'A', blk_get_size(blk));
+#       endif
+
+	blk_clear_alloc(blk);
+	blk_try_merge(blk);
+}
+
+/*
+ * These wrappers are used for linking libc against the kernel itself.
+ * This is a "temporary" hack because i haven't figured out the whole C flags
+ * thingy for properly producing two versions of libc (one static one for the
+ * kernel and a shared one for user space).
+ */
+
+__weak void *malloc(usize size)
+{
+	return kmalloc(size, MM_KERN);
+}
+
+__weak void free(void *ptr)
+{
+	kfree(ptr);
+}
+
+static inline struct memblk *blk_create(usize num_pages)
+{
+	usize blksize;
+	if (mul_overflow(&blksize, num_pages, PAGE_SIZE))
+		return NULL;
+
+	/*
+	 * heap_end points to the first address that is not part of the heap
+	 * anymore, so that's where the new block starts when we add pages
+	 */
+	struct memblk *blk = heap_end;
+	if (grow_heap(num_pages) != num_pages)
+		return NULL; /* OOM :( */
+
+	blk_set_size(blk, blksize - OVERHEAD);
+	blk_clear_alloc(blk);
+	blk_set_border_end(blk);
+
+	struct memblk *old_high = blk_prev(blk);
+	blk_clear_border_end(old_high);
+	if (!blk_is_alloc(old_high)) {
+		clist_del(&old_high->clink);
+		blk = blk_merge(old_high, blk);
+	}
+
+	return blk;
+}
+
+static inline usize grow_heap(usize num_pages)
+{
+	usize i;
+
+	for (i = 0; i < num_pages; i++) {
+		uintptr_t page_phys = get_page();
+		if (!page_phys)
+			break;
+
+		if (map_page(page_phys, heap_end, MM_PAGE_RW) != 0) {
+			put_page(page_phys);
+			break;
+		}
+
+		heap_end += PAGE_SIZE;
+	}
+
+	vm_flush();
+
+	return i;
+}
+
+#define ALLOC_FLAG	((usize)1 << 0)
+#define BORDER_FLAG	((usize)1 << 1)
+#define SIZE_MASK	( ~(ALLOC_FLAG | BORDER_FLAG) )
+
+static struct memblk *blk_try_merge(struct memblk *blk)
+{
+	struct memblk *neighbor = blk_prev(blk);
+	if (neighbor != NULL && !blk_is_alloc(neighbor)) {
+		clist_del(&neighbor->clink);
+		blk = blk_merge(neighbor, blk);
+	}
+
+	neighbor = blk_next(blk);
+	if (neighbor != NULL && !blk_is_alloc(neighbor)) {
+		clist_del(&neighbor->clink);
+		blk = blk_merge(blk, neighbor);
+	}
+
+	struct memblk *cursor;
+	clist_foreach_entry(&blocks, cursor, clink) {
+		if (blk_get_size(cursor) >= blk_get_size(blk))
+			break;
+	}
+	clist_add(&cursor->clink, &blk->clink);
+
+	return blk;
+}
+
+static struct memblk *blk_merge(struct memblk *bottom, struct memblk *top)
+{
+	usize bottom_size = blk_get_size(bottom);
+	usize top_size = blk_get_size(top);
+	usize total_size = bottom_size + top_size + OVERHEAD;
+
+	blk_set_size(bottom, total_size);
+
+	return bottom;
+}
+
+static struct memblk *blk_slice(struct memblk *blk, usize slice_size)
+{
+	struct memblk *cursor = clist_prev_entry(blk, clink);
+	clist_del(&blk->clink);
+
+	/*
+	 * If the remaining size is less than the minimum allocation unit, we
+	 * hand out the entire block.  Additionally, we must add an underflow
+	 * check which happens if the slice size is less than OVERHEAD smaller
+	 * than the full block size.
+	 */
+	usize rest_size = blk_get_size(blk) - slice_size - OVERHEAD;
+	bool carry = sub_underflow(&rest_size, blk_get_size(blk), slice_size + OVERHEAD);
+	if (rest_size < MIN_SIZE || carry) {
+		blk_set_alloc(blk);
+		return blk;
+	}
+
+	usize slice_words = slice_size / sizeof(blk->low_size);
+	struct memblk *rest = (void *)&blk->high_size[slice_words + 1];
+	blk_set_size(rest, rest_size);
+	blk_clear_alloc(rest);
+	blk_clear_border_start(rest);
+
+	blk_set_size(blk, slice_size);
+	blk_set_alloc(blk);
+	blk_clear_border_end(blk);
+
+	clist_foreach_entry_rev_continue(&blocks, cursor, clink) {
+		if (blk_get_size(cursor) <= rest_size)
+			break;
+	}
+	clist_add_first(&cursor->clink, &rest->clink);
+
+	return blk;
+}
+
+static inline struct memblk *blk_prev(struct memblk *blk)
+{
+	if (blk_is_border_start(blk))
+		return NULL;
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Warray-bounds" /* trust me bro, this is fine */
+	return (void *)blk - (blk->low_size[-1] & SIZE_MASK) - OVERHEAD;
+#pragma clang diagnostic pop
+}
+
+static inline struct memblk *blk_next(struct memblk *blk)
+{
+	if (blk_is_border_end(blk))
+		return NULL;
+
+	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
+	return (void *)&blk->high_size[index + 1];
+}
+
+static inline usize blk_get_size(struct memblk *blk)
+{
+#	ifdef DEBUG
+		usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
+		if ((blk->low_size[0] & SIZE_MASK) != (blk->high_size[index] & SIZE_MASK))
+			kprintf("Memory corruption in block %p detected!\n", blk);
+#	endif
+	return blk->low_size[0] & SIZE_MASK;
+}
+
+static void blk_set_size(struct memblk *blk, usize size)
+{
+	/* don't affect flags */
+	blk->low_size[0] &= ~SIZE_MASK;
+#       ifdef DEBUG
+		if (size & ~SIZE_MASK)
+			kprintf("Unaligned size in blk_set_size()\n");
+#       endif
+	blk->low_size[0] |= size & SIZE_MASK;
+
+	usize index = size / sizeof(blk->low_size[0]);
+	blk->high_size[index] &= ~SIZE_MASK;
+	blk->high_size[index] |= size & SIZE_MASK;
+}
+
+static inline void blk_set_alloc(struct memblk *blk)
+{
+	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
+
+	blk->low_size[0] |= ALLOC_FLAG;
+	blk->high_size[index] |= ALLOC_FLAG;
+}
+
+static inline void blk_clear_alloc(struct memblk *blk)
+{
+	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
+
+	blk->low_size[0] &= ~ALLOC_FLAG;
+	blk->high_size[index] &= ~ALLOC_FLAG;
+}
+
+static inline bool blk_is_alloc(struct memblk *blk)
+{
+	return (blk->low_size[0] & ALLOC_FLAG) != 0;
+}
+
+static inline void blk_set_border_start(struct memblk *blk)
+{
+	blk->low_size[0] |= BORDER_FLAG;
+}
+
+static inline void blk_clear_border_start(struct memblk *blk)
+{
+	blk->low_size[0] &= ~BORDER_FLAG;
+}
+
+static inline bool blk_is_border_start(struct memblk *blk)
+{
+	return (blk->low_size[0] & BORDER_FLAG) != 0;
+}
+
+static inline void blk_set_border_end(struct memblk *blk)
+{
+	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
+	blk->high_size[index] |= BORDER_FLAG;
+}
+
+static inline void blk_clear_border_end(struct memblk *blk)
+{
+	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
+	blk->high_size[index] &= ~BORDER_FLAG;
+}
+
+static inline bool blk_is_border_end(struct memblk *blk)
+{
+	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
+	return (blk->high_size[index] & BORDER_FLAG) != 0;
+}
+
+/*
+ * This file is part of GayBSD.
+ * Copyright (c) 2021 fef <owo@fef.moe>.
+ *
+ * GayBSD is nonviolent software: you may only use, redistribute, and/or
+ * modify it under the terms of the Cooperative Nonviolent Public License
+ * (CNPL) as found in the LICENSE file in the source code root directory
+ * or at <https://git.pixie.town/thufie/npl-builder>; either version 7
+ * of the license, or (at your option) any later version.
+ *
+ * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent
+ * permitted by applicable law.  See the CNPL for details.
+ */
--- a/kernel/mm/kmalloc.c
+++ b/kernel/mm/kmalloc.c
@ -1,452 +1,37 @@
 /* See the end of this file for copyright and license terms. */

-#include <arch/page.h>
-
-#include <gay/arith.h>
-#include <gay/cdefs.h>
-#include <gay/clist.h>
-#include <gay/config.h>
-#include <gay/errno.h>
 #include <gay/kprintf.h>
 #include <gay/mm.h>
 #include <gay/types.h>

-#include <string.h>
+extern void _image_start_phys;
+extern void _image_end_phys;

-/*
- * This allocator is based on the popular design by Doug Lea:
- * <http://gee.cs.oswego.edu/dl/html/malloc.html>
- * For a more in-depth description of how the individual parts work together,
- * see also my implementation for Ardix which is very similar except that it
- * doesn't have paging:
- * <https://git.bsd.gay/fef/ardix/src/commit/c767d551d3301fc30f9fce30eda8f04e2f9a42ab/kernel/mm.c>
- * As a matter of fact, this allocator is merely an extension of the one from
- * Ardix with the only difference being that the heap can be extended upwards.
- */
+/* these are initialized by pages_init() */
+void *kheap_start;
+void *kheap_end;

-/**
- * Memory block header.
- * This sits at the beginning of every memory block (duh).
- */
-struct memblk {
-	/**
-	 * @brief The usable low_size, i.e. the total block low_size minus `MEMBLK_OVERHEAD`.
-	 *
-	 * This low_size will also be written to the very end of the block, just after
-	 * the last usable address.  Additionally, since blocks are always aligned
-	 * to at least 4 bytes anyways, we can use the LSB of this low_size as a flag
-	 * for whether the block is currently allocated (1) or not (0).  This is
-	 * going to make it much easier to detect two free neighboring blocks when
-	 * `kfree()`ing one.
-	 */
-	usize low_size[1];
-
-	union {
-		/** @brief If the block is allocated, this will be overwritten */
-		struct clist clink;
-
-		/** @brief Used as the return value for `kmalloc()` */
-		u8 data[0];
-		/**
-		 * @brief Used to get the copy of the size at the end of
-		 * the block, right after the last byte of `data`
-		 */
-		usize high_size[0];
-	};
-};
-
-/* overhead per memory block in bytes (the two block sizes at the beginning and end) */
-#define OVERHEAD (2 * sizeof(usize))
-/* every allocation is padded to a multiple of this */
-#define MIN_SIZE (sizeof(struct clist))
-
-/* memory blocks, sorted by increasing size */
-static CLIST(blocks);
-
-/*
- * We play it *really* simple:  Start at an arbitrary (page aligned, preferably
- * even page table aligned) address in virtual memory and extend the area as
- * needed as the heap grows.  Efficiency doesn't matter for now; we always make
- * the heap a contiguous area without holes.  There isn't even a mechanism for
- * releasing physical pages yet, i really just want to get to anything that is
- * at all usable so i can finally work on the core system architecture.
- */
-static void *heap_start = (void *)0xd0000000;
-/*
- * Points to the first address that is not part of the heap anymore, such that
- *   sizeof(heap) == heap_end - heap_start
- * Thus, the heap initially has a size of zero.
- */
-static void *heap_end = (void *)0xd0000000;
-
-/**
- * @brief Increase `heap_end` by up to `num_pages * PAGE_SIZE`.
- *
- * @param num_pages Number of pages to increase the heap by
- * @returns The actual number of pages the heap was increased by; this may be
- *	less than `num_pages` if there were not enough free pages left
- */
-static usize grow_heap(usize num_pages);
-/**
- * @brief Add a new block at the end of the heap by downloading more RAM (`grow_heap()`, actually). */
-static struct memblk *blk_create(usize num_pages);
-/** @brief Get the usable block size in bytes, without flags or overhead. */
-static usize blk_get_size(struct memblk *blk);
-/** @brief Set the usable block size without overhead and without affecting flags. */
-static void blk_set_size(struct memblk *blk, usize size);
-/** @brief Flag a block as allocated. */
-static void blk_set_alloc(struct memblk *blk);
-/** @brief Remove the allocated flag from a block. */
-static void blk_clear_alloc(struct memblk *blk);
-/** @brief Return nonzero if the block is allocated. */
-static bool blk_is_alloc(struct memblk *blk);
-/** @brief Set the border flag at the start of a block. */
-static void blk_set_border_start(struct memblk *blk);
-/** @brief Remove the border flag from the start of a block. */
-static void blk_clear_border_start(struct memblk *blk);
-/** @brief Return nonzero if a block has the border flag set at the start. */
-static bool blk_is_border_start(struct memblk *blk);
-/** @brief Set the border flag at the end of a block. */
-static void blk_set_border_end(struct memblk *blk);
-/** @brief Remove the border flag from the end of a block. */
-static void blk_clear_border_end(struct memblk *blk);
-/** @brief Return nonzero if a block has the border flag set at the end. */
-static bool blk_is_border_end(struct memblk *blk);
-/** @brief Get a block's immediate lower neighbor, or NULL if it doesn't have one. */
-static struct memblk *blk_prev(struct memblk *blk);
-/** @brief Get a block's immediate higher neighbor, or NULL if it doesn't have one. */
-static struct memblk *blk_next(struct memblk *blk);
-/** @brief Merge two contiguous free blocks into one, resort the list, and return the block. */
-static struct memblk *blk_merge(struct memblk *bottom, struct memblk *top);
-/** @brief Attempt to merge both the lower and higher neighbors of a free block. */
-static struct memblk *blk_try_merge(struct memblk *blk);
-/** @brief Cut a slice from a free block and return the slice. */
-static struct memblk *blk_slice(struct memblk *blk, usize slice_size);
-
-int kmalloc_init(uintptr_t phys_start, uintptr_t phys_end)
+int kmalloc_init(uintptr_t _phys_start, uintptr_t _phys_end)
 {
-	int err = mem_init(phys_start, phys_end);
-	if (err)
-		return err;
-
-	if (grow_heap(1) != 1)
-		return -ENOMEM;
-
-	struct memblk *blk = heap_start;
-	blk_set_size(blk, PAGE_SIZE - OVERHEAD);
-	blk_clear_alloc(blk);
-	blk_set_border_start(blk);
-	blk_set_border_end(blk);
-	clist_add(&blocks, &blk->clink);
-
-	return 0;
-}
-
-void *kmalloc(usize size, enum mm_flags flags)
-{
-	if (flags != MM_KERN) {
-		kprintf("Invalid flags passed to kmalloc()\n");
-		return NULL;
+	kprintf("kmalloc_init(%p, %p)\n", (void *)_phys_start, (void *)_phys_end);
+	uintptr_t image_start_phys = (uintptr_t)&_image_start_phys;
+	uintptr_t image_end_phys = (uintptr_t)&_image_end_phys;
+	if (_phys_start < image_start_phys && _phys_end > image_start_phys) {
+		if (image_start_phys - _phys_start > _phys_end - image_start_phys)
+			_phys_end = image_start_phys;
+		else
+			_phys_start = image_end_phys;
 	}
-
-	if (size == 0)
-		return NULL;
-
-	if (size % MIN_SIZE != 0)
-		size = (size / MIN_SIZE) * MIN_SIZE + MIN_SIZE;
-
-	struct memblk *cursor;
-	struct memblk *blk = NULL;
-	clist_foreach_entry(&blocks, cursor, clink) {
-		if (blk_get_size(cursor) >= size) {
-			blk = cursor;
-			break;
-		}
+	if (_phys_start < image_end_phys && _phys_end > image_end_phys) {
+		if (image_end_phys - _phys_start > _phys_end - image_end_phys)
+			_phys_end = image_start_phys;
+		else
+			_phys_start = image_end_phys;
 	}
-
-	if (blk == NULL) {
-		usize required_pages = ((size + OVERHEAD) / PAGE_SIZE) + 1;
-		blk = blk_create(required_pages);
-		if (blk == NULL) {
-			kprintf("Kernel OOM qwq\n");
-			return NULL;
-		}
-		clist_add(&blocks, &blk->clink);
-	}
-
-	blk = blk_slice(blk, size);
-	blk_set_alloc(blk);
-#	if CFG_POISON_HEAP
-		memset(blk->data, 'a', blk_get_size(blk));
-#	endif
-	return blk->data;
-}
-
-void kfree(void *ptr)
-{
-#	ifdef DEBUG
-		if (ptr < heap_start || ptr > heap_end) {
-			kprintf("Tried to free %p which is outside the heap!\n", ptr);
-			return;
-		}
-#	endif
-
-	struct memblk *blk = ptr - sizeof(blk->low_size);
-#	ifdef DEBUG
-		if (!blk_is_alloc(blk)) {
-			kprintf("Double free of %p!\n", ptr);
-			return;
-		}
-#	endif
-
-#       if CFG_POISON_HEAP
-		memset(blk->data, 'A', blk_get_size(blk));
-#       endif
-
-	blk_clear_alloc(blk);
-	blk_try_merge(blk);
-}
-
-/*
- * These wrappers are used for linking libc against the kernel itself.
- * This is a "temporary" hack because i haven't figured out the whole C flags
- * thingy for properly producing two versions of libc (one static one for the
- * kernel and a shared one for user space).
- */
-
-__weak void *malloc(usize size)
-{
-	return kmalloc(size, MM_KERN);
-}
-
-__weak void free(void *ptr)
-{
-	kfree(ptr);
-}
-
-static inline struct memblk *blk_create(usize num_pages)
-{
-	usize blksize;
-	if (mul_overflow(&blksize, num_pages, PAGE_SIZE))
-		return NULL;
-
-	/*
-	 * heap_end points to the first address that is not part of the heap
-	 * anymore, so that's where the new block starts when we add pages
-	 */
-	struct memblk *blk = heap_end;
-	if (grow_heap(num_pages) != num_pages)
-		return NULL; /* OOM :( */
-
-	blk_set_size(blk, blksize - OVERHEAD);
-	blk_clear_alloc(blk);
-	blk_set_border_end(blk);
-
-	struct memblk *old_high = blk_prev(blk);
-	blk_clear_border_end(old_high);
-	if (!blk_is_alloc(old_high)) {
-		clist_del(&old_high->clink);
-		blk = blk_merge(old_high, blk);
-	}
-
-	return blk;
-}
-
-static inline usize grow_heap(usize num_pages)
-{
-	usize i;
-
-	for (i = 0; i < num_pages; i++) {
-		uintptr_t page_phys = get_page();
-		if (!page_phys)
-			break;
-
-		if (map_page(page_phys, heap_end, MM_PAGE_RW) != 0) {
-			put_page(page_phys);
-			break;
-		}
-
-		heap_end += PAGE_SIZE;
-	}
-
-	vm_flush();
-
-	return i;
-}
-
-#define ALLOC_FLAG	((usize)1 << 0)
-#define BORDER_FLAG	((usize)1 << 1)
-#define SIZE_MASK	( ~(ALLOC_FLAG | BORDER_FLAG) )
-
-static struct memblk *blk_try_merge(struct memblk *blk)
-{
-	struct memblk *neighbor = blk_prev(blk);
-	if (neighbor != NULL && !blk_is_alloc(neighbor)) {
-		clist_del(&neighbor->clink);
-		blk = blk_merge(neighbor, blk);
-	}
-
-	neighbor = blk_next(blk);
-	if (neighbor != NULL && !blk_is_alloc(neighbor)) {
-		clist_del(&neighbor->clink);
-		blk = blk_merge(blk, neighbor);
-	}
-
-	struct memblk *cursor;
-	clist_foreach_entry(&blocks, cursor, clink) {
-		if (blk_get_size(cursor) >= blk_get_size(blk))
-			break;
-	}
-	clist_add(&cursor->clink, &blk->clink);
-
-	return blk;
-}
-
-static struct memblk *blk_merge(struct memblk *bottom, struct memblk *top)
-{
-	usize bottom_size = blk_get_size(bottom);
-	usize top_size = blk_get_size(top);
-	usize total_size = bottom_size + top_size + OVERHEAD;
-
-	blk_set_size(bottom, total_size);
-
-	return bottom;
-}
-
-static struct memblk *blk_slice(struct memblk *blk, usize slice_size)
-{
-	struct memblk *cursor = clist_prev_entry(blk, clink);
-	clist_del(&blk->clink);
-
-	/*
-	 * If the remaining size is less than the minimum allocation unit, we
-	 * hand out the entire block.  Additionally, we must add an underflow
-	 * check which happens if the slice size is less than OVERHEAD smaller
-	 * than the full block size.
-	 */
-	usize rest_size = blk_get_size(blk) - slice_size - OVERHEAD;
-	bool carry = sub_underflow(&rest_size, blk_get_size(blk), slice_size + OVERHEAD);
-	if (rest_size < MIN_SIZE || carry) {
-		blk_set_alloc(blk);
-		return blk;
-	}
-
-	usize slice_words = slice_size / sizeof(blk->low_size);
-	struct memblk *rest = (void *)&blk->high_size[slice_words + 1];
-	blk_set_size(rest, rest_size);
-	blk_clear_alloc(rest);
-	blk_clear_border_start(rest);
-
-	blk_set_size(blk, slice_size);
-	blk_set_alloc(blk);
-	blk_clear_border_end(blk);
-
-	clist_foreach_entry_rev_continue(&blocks, cursor, clink) {
-		if (blk_get_size(cursor) <= rest_size)
-			break;
-	}
-	clist_add_first(&cursor->clink, &rest->clink);
-
-	return blk;
-}
-
-static inline struct memblk *blk_prev(struct memblk *blk)
-{
-	if (blk_is_border_start(blk))
-		return NULL;
-
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Warray-bounds" /* trust me bro, this is fine */
-	return (void *)blk - (blk->low_size[-1] & SIZE_MASK) - OVERHEAD;
-#pragma clang diagnostic pop
-}
-
-static inline struct memblk *blk_next(struct memblk *blk)
-{
-	if (blk_is_border_end(blk))
-		return NULL;
-
-	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
-	return (void *)&blk->high_size[index + 1];
-}
-
-static inline usize blk_get_size(struct memblk *blk)
-{
-#	ifdef DEBUG
-		usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
-		if ((blk->low_size[0] & SIZE_MASK) != (blk->high_size[index] & SIZE_MASK))
-			kprintf("Memory corruption in block %p detected!\n", blk);
-#	endif
-	return blk->low_size[0] & SIZE_MASK;
-}
-
-static void blk_set_size(struct memblk *blk, usize size)
-{
-	/* don't affect flags */
-	blk->low_size[0] &= ~SIZE_MASK;
-#       ifdef DEBUG
-		if (size & ~SIZE_MASK)
-			kprintf("Unaligned size in blk_set_size()\n");
-#       endif
-	blk->low_size[0] |= size & SIZE_MASK;
-
-	usize index = size / sizeof(blk->low_size[0]);
-	blk->high_size[index] &= ~SIZE_MASK;
-	blk->high_size[index] |= size & SIZE_MASK;
-}
-
-static inline void blk_set_alloc(struct memblk *blk)
-{
-	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
-
-	blk->low_size[0] |= ALLOC_FLAG;
-	blk->high_size[index] |= ALLOC_FLAG;
-}
-
-static inline void blk_clear_alloc(struct memblk *blk)
-{
-	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
-
-	blk->low_size[0] &= ~ALLOC_FLAG;
-	blk->high_size[index] &= ~ALLOC_FLAG;
-}
-
-static inline bool blk_is_alloc(struct memblk *blk)
-{
-	return (blk->low_size[0] & ALLOC_FLAG) != 0;
-}
-
-static inline void blk_set_border_start(struct memblk *blk)
-{
-	blk->low_size[0] |= BORDER_FLAG;
-}
-
-static inline void blk_clear_border_start(struct memblk *blk)
-{
-	blk->low_size[0] &= ~BORDER_FLAG;
-}
-
-static inline bool blk_is_border_start(struct memblk *blk)
-{
-	return (blk->low_size[0] & BORDER_FLAG) != 0;
-}
-
-static inline void blk_set_border_end(struct memblk *blk)
-{
-	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
-	blk->high_size[index] |= BORDER_FLAG;
-}
-
-static inline void blk_clear_border_end(struct memblk *blk)
-{
-	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
-	blk->high_size[index] &= ~BORDER_FLAG;
-}
-
-static inline bool blk_is_border_end(struct memblk *blk)
-{
-	usize index = blk->low_size[0] / sizeof(blk->low_size[0]);
-	return (blk->high_size[index] & BORDER_FLAG) != 0;
+	phys_start = uintptr_align(_phys_start, +HUGEPAGE_SHIFT);
+	phys_end = uintptr_align(_phys_end, -HUGEPAGE_SHIFT);
+	kprintf("Aligning physical memory to 0x%08x-0x%08x\n", phys_start, phys_end);
+	return pages_init();
 }

 /*
--- a/kernel/mm/page.c
+++ b/kernel/mm/page.c
@ -0,0 +1,403 @@
+/* See the end of this file for copyright and license terms. */
+
+#include <arch/page.h>
+
+#include <gay/clist.h>
+#include <gay/config.h>
+#include <gay/kprintf.h>
+#include <gay/mm.h>
+#include <gay/types.h>
+#include <gay/util.h>
+
+#include <limits.h>
+#include <string.h>
+
+#ifndef __HAVE_HUGEPAGES
+#error "Systems without huge pages are currently unsupported because i'm a dumb bitch"
+#endif
+
+#if DMAP_OFFSET % HUGEPAGE_SIZE != 0
+#error "DMAP_OFFSET must be an integral multiple of HUGEPAGE_SIZE"
+#endif
+
+/* this should be impossible because arch/page.h must also define PAGE_SHIFT
+ * and HUGEPAGE_SHIFT, meaning the two are definitively powers of 2 */
+#if HUGEPAGE_SIZE % PAGE_SIZE != 0
+#error "HUGEPAGE_SIZE must be an integral multiple of PAGE_SIZE"
+#endif
+
+#if PAGE_SIZE % LONG_BIT != 0
+#error "PAGE_SIZE must be an integral multiple of LONG_BIT"
+#endif
+
+#if CFG_DEBUG_PAGE_ALLOCS
+#define page_debug(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
+#else
+#define page_debug(msg, ...)
+#endif
+
+/**
+ * We have cache levels for areas ranging from a single page up to a huge page
+ * on a logarithmic scale.  Every level covers double the pages per entry than
+ * the one below it, starting at one page per entry.  The effective result is
+ * that a single entry in the cache on level L covers `(1 << L)` pages.
+ */
+#define CACHE_LEVELS (HUGEPAGE_SHIFT - PAGE_SHIFT + 1)
+
+struct cache_pool {
+	struct clist freelist;
+	unsigned long *bitmap;
+	usize free_entries;
+	usize bitmap_len;
+};
+static struct cache_pool caches[CACHE_LEVELS];
+
+#define LONG_MASK ( ~(usize)(sizeof(long) - 1) )
+
+uintptr_t phys_start;
+uintptr_t phys_end;
+
+/**
+ * @brief Split a block and return the lower half.
+ * The block is assumed to already have been removed from its freelist.
+ * The high half (i.e. the block that is *not* returned) is inserted into the
+ * freelist one level below `level`.
+ */
+static void *split_buddy(void *ptr, int level);
+/**
+ * @brief Attempt to coalesce a block with its buddy.
+ * If coalition is possible, the buddy is removed from its freelist at
+ * `level` and the union block is inserted at `level + 1`.
+ *
+ * @param ptr Pointer to the block
+ * @param level Cache level, must be less than `CACHE_LEVELS - 1` (because you
+ *	can't join blocks at the highest cache level)
+ * @return The joined block, or `nil` if coalition was not possible
+ */
+static void *try_join_buddy(void *ptr, int level);
+
+static usize get_bit_number(void *ptr, int level);
+
+static void set_bits(unsigned long *bitfield, usize first, usize count);
+static void clr_bits(unsigned long *bitfield, usize first, usize count);
+
+static bool get_bit(const unsigned long *bitfield, usize bit_number);
+static void set_bit(unsigned long *bitfield, usize bit_number);
+
+static int sanity_check(void)
+{
+	if (phys_end != HUGEPAGE_ALIGN(phys_end) || phys_start != HUGEPAGE_ALIGN(phys_start)) {
+		kprintf("Unaligned memory, this should never be possible\n");
+		return 1;
+	}
+
+	if ((phys_end - phys_start) < (32 * 1024 * 1024)) {
+		kprintf("Less than 32 MB of usable RAM, this wouldn't go well\n");
+		return 1;
+	}
+
+	if (phys_start > phys_end) {
+		kprintf("Hey, this is funny. pages_init() was called with parameters "
+			"such that phys_start > phys_end (%p > %p), which "
+			"should absolutely never be possible. I can't really continue "
+			"like this, so have a nice day.\n", (void *)phys_start, (void *)phys_end);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void init_freelist(void)
+{
+	for (int i = 0; i < CACHE_LEVELS; i++) {
+		clist_init(&caches[i].freelist);
+		caches[i].free_entries = 0;
+	}
+
+	struct cache_pool *pool = &caches[CACHE_LEVELS - 1];
+	const usize step = 1 << (CACHE_LEVELS + PAGE_SHIFT);
+	for (void *pos = kheap_start; pos < kheap_end; pos += step) {
+		struct clist *entry = pos;
+		clist_add(&pool->freelist, entry);
+		pool->free_entries += 1;
+	}
+}
+
+int pages_init(void)
+{
+	usize phys_size = phys_end - phys_start;
+
+	if (sanity_check() != 0)
+		return 1;
+
+	/*
+	 * map entire physical memory into the direct contiguous area
+	 */
+	for (uintptr_t physptr = phys_start; physptr < phys_end; physptr += HUGEPAGE_SIZE) {
+		const enum mm_page_flags pflags = MM_PAGE_HUGE | MM_PAGE_RW | MM_PAGE_GLOBAL;
+		map_page(physptr, (void *)(physptr + DMAP_OFFSET), pflags);
+	}
+	vm_flush();
+
+	/*
+	 * calculate the size of each bitmap, as well as their combined size
+	 */
+	usize cache_bytes = 0;
+	for (int i = 0; i < CACHE_LEVELS; i++) {
+		usize bits = phys_size >> (PAGE_SHIFT + i);
+		/* round up to the next full long */
+		if (bits % LONG_BIT) {
+			bits &= LONG_MASK;
+			bits += LONG_BIT;
+		}
+		cache_bytes += bits / 8;
+		caches[i].bitmap_len = bits / LONG_BIT;
+	}
+	/* smol buffer in case we overshoot for whatever reason */
+	cache_bytes += sizeof(long);
+
+	page_debug("Page frame overhead = %zu bytes\n", cache_bytes);
+
+	/*
+	 * zero out all bitmaps
+	 */
+	uintptr_t cache_start_phys = phys_end - cache_bytes;
+	unsigned long *cache_start = __v(cache_start_phys);
+	memset(cache_start, 0, cache_bytes);
+
+	/*
+	 * populate the caches array and preallocate pages that can't be handed
+	 * out (i.e. the cache bitmaps)
+	 */
+	unsigned long *cache_pos = cache_start;
+	for (int i = 0; i < CACHE_LEVELS; i++) {
+		usize total_bits = caches[i].bitmap_len * LONG_BIT;
+		usize wasted_bits = total_bits - (cache_bytes >> (PAGE_SHIFT + i));
+		if (wasted_bits == 0)
+			wasted_bits = 1;
+		set_bits(cache_pos, total_bits - wasted_bits, wasted_bits);
+
+		caches[i].bitmap = cache_pos;
+		cache_pos += caches[i].bitmap_len;
+	}
+
+	/* kheap_start and kheap_end are globals */
+	kheap_start = __v(phys_start);
+	kheap_end = cache_start;
+
+	init_freelist();
+
+	return 0;
+}
+
+static int get_level(usize count)
+{
+	int level;
+	for (level = 0; level < CACHE_LEVELS; level++) {
+		if ((1 << level) >= count)
+			break;
+	}
+	return level;
+}
+
+void *get_pages(usize count, enum mm_flags flags)
+{
+	int level = get_level(count);
+	if (level == CACHE_LEVELS)
+		return nil;
+
+	struct clist *entry;
+	int entry_level;
+	for (entry_level = level; entry_level < CACHE_LEVELS; entry_level++) {
+		if (caches[entry_level].free_entries > 0) {
+			entry = caches[entry_level].freelist.next;
+			break;
+		}
+	}
+	if (entry_level == CACHE_LEVELS)
+		return nil;
+
+	clist_del(entry);
+	caches[entry_level].free_entries--;
+
+	usize bit_number = get_bit_number(entry, level);
+	while (entry_level > level) {
+		entry = split_buddy(entry, entry_level);
+		set_bit(caches[entry_level].bitmap, bit_number);
+		entry_level--;
+		bit_number <<= 1;
+	}
+
+	do {
+		usize bit_count = 1 << (level - entry_level);
+		set_bits(caches[entry_level].bitmap, bit_number, bit_count);
+	} while (entry_level-- != 0);
+
+	return (void *)entry;
+}
+
+void free_pages(void *ptr, usize count)
+{
+	int level = get_level(count);
+	if (level == CACHE_LEVELS)
+		return;
+
+	usize bit_number = get_bit_number(ptr, level);
+	usize bit_count = 1;
+	for (int i = level; i >= 0; i--) {
+		clr_bits(caches[i].bitmap, bit_number, bit_count);
+		bit_number <<= 1;
+		bit_count <<= 1;
+	}
+
+	while (ptr != nil && level < CACHE_LEVELS - 1) {
+		ptr = try_join_buddy(ptr, level);
+		level++;
+	}
+}
+
+static inline usize get_bit_number(void *ptr, int level)
+{
+	return ((uintptr_t)ptr - (uintptr_t)kheap_start) >> (PAGE_SHIFT + level);
+}
+
+/**
+ * @brief Set a range of bits in a bitfield.
+ *
+ * @param bitfield Pointer to the beginning of the bitfield
+ * @param first Number of the first bit to set, counting from 0
+ * @param count Amount of bits to set
+ */
+static void set_bits(unsigned long *bitfield, usize first, usize count)
+{
+	bitfield += first / LONG_BIT;
+	unsigned int bit = first % LONG_BIT;
+
+	if (bit != 0) {
+		unsigned long mask = (1lu << bit) - 1;
+		*bitfield++ |= ~mask;
+		count -= bit;
+	}
+
+	while (count >= LONG_BIT) {
+		*bitfield++ = ULONG_MAX;
+		count -= LONG_BIT;
+	}
+
+	if (count != 0) {
+		unsigned long mask = (1lu << count) - 1;
+		*bitfield |= mask;
+	}
+}
+
+/**
+ * @brief Clear a range of bits in a bitfield.
+ *
+ * The algorithm is similar to `set_bits()`, it just does the inverse.
+ *
+ * @param bitfield Pointer to the beginning of the bitfield
+ * @param first Number of the first bit to clear, counting from 0
+ * @param count Amount of bits to clear
+ */
+static void clr_bits(unsigned long *bitfield, usize first, usize count)
+{
+	bitfield += first / LONG_BIT;
+	unsigned int bit = first % LONG_BIT;
+
+	if (bit != 0) {
+		unsigned long mask = (1lu << bit) - 1;
+		*bitfield++ &= mask;
+		count -= bit;
+	}
+
+	while (count >= LONG_BIT) {
+		*bitfield++ = 0;
+		count -= LONG_BIT;
+	}
+
+	if (count != 0) {
+		unsigned long mask = (1lu << bit) - 1;
+		*bitfield &= ~mask;
+	}
+}
+
+static inline bool get_bit(const unsigned long *bitfield, usize bit_number)
+{
+	unsigned long longword = bitfield[bit_number / LONG_BIT];
+	unsigned long mask = 1lu << (bit_number % LONG_BIT);
+	return (longword & mask) != 0;
+}
+
+static inline void set_bit(unsigned long *bitfield, usize bit_number)
+{
+	unsigned long mask = 1lu << (bit_number % LONG_BIT);
+	bitfield[bit_number / LONG_BIT] |= mask;
+}
+
+static inline void *split_buddy(void *ptr, int level)
+{
+#	if CFG_DEBUG_PAGE_ALLOCS
+		if ((uintptr_t)ptr % (1 << (PAGE_SHIFT + level))) {
+			kprintf("split_buddy(%p, %d): unaligned ptr!\n", ptr, level);
+			return nil;
+		}
+		if (level < 1 || level >= CACHE_LEVELS) {
+			kprintf("split_buddy(%p, %d): invalid level!\n", ptr, level);
+			return nil;
+		}
+#	endif
+
+	struct clist *high_buddy = ptr + (1 << (PAGE_SHIFT + level - 1));
+	clist_add(&caches[level - 1].freelist, high_buddy);
+	caches[level - 1].free_entries++;
+
+	return ptr;
+}
+
+static void *try_join_buddy(void *ptr, int level)
+{
+	const usize entry_size = 1 << (PAGE_SHIFT + level);
+
+#	if CFG_DEBUG_PAGE_ALLOCS
+		if ((uintptr_t)ptr % entry_size) {
+			kprintf("try_join_buddy(%p, %d): unaligned ptr!\n", ptr, level);
+			return nil;
+		}
+		/* level must be < CACHE_LEVELS - 1 because you
+		 * can't join blocks on the topmost level */
+		if (level >= CACHE_LEVELS - 1) {
+			kprintf("try_join_buddy(%p, %d): level >= CACHE_LEVELS - 1!\n", ptr, level);
+			return nil;
+		}
+#	endif
+
+	/* test if the buddy block is allocated and return nil if it is */
+	uintptr_t buddy = (uintptr_t)ptr ^ entry_size;
+	usize buddy_bitnum = get_bit_number((void *)buddy, level);
+	if (get_bit(caches[level].bitmap, buddy_bitnum))
+		return nil;
+
+	/* if it is not, remove it from the freelist */
+	clist_del((struct clist *)buddy);
+	caches[level].free_entries--;
+
+	/* add the coalesced block to the freelist one level above */
+	struct clist *even = (struct clist *)((uintptr_t)ptr & ~entry_size);
+	clist_add(&caches[level + 1].freelist, even);
+	caches[level + 1].free_entries++;
+	return even;
+}
+
+/*
+ * This file is part of GayBSD.
+ * Copyright (c) 2021 fef <owo@fef.moe>.
+ *
+ * GayBSD is nonviolent software: you may only use, redistribute, and/or
+ * modify it under the terms of the Cooperative Nonviolent Public License
+ * (CNPL) as found in the LICENSE file in the source code root directory
+ * or at <https://git.pixie.town/thufie/npl-builder>; either version 7
+ * of the license, or (at your option) any later version.
+ *
+ * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent
+ * permitted by applicable law.  See the CNPL for details.
+ */