diff --git a/arch/x86/include/i386/page.h b/arch/x86/include/i386/page.h
index aabb8af..167c316 100644
--- a/arch/x86/include/i386/page.h
+++ b/arch/x86/include/i386/page.h
@@ -13,18 +13,26 @@
 #include <gay/cdefs.h>
 #include <gay/types.h>
 
+/**
+ * @brief A single 32-bit Page Table Entry.
+ * The layout matches that of the Intel SDM, vol 3, sect 4.3, fig 4-4.
+ * Bits 9 and 10 (`slab` and `atomic`) are marked as AVL in the manual and
+ * ignored by the MMU.  We only use them for `get_pflags()`/`set_pflags()`.
+ */
 struct x86_page_table_entry {
-	unsigned present:1;		/**< Page Fault on access if 0 */
-	unsigned rw:1;			/**< Page Fault on write if 0 */
-	unsigned user:1;		/**< Page Fault on user mode access if 0 */
-	unsigned write_through:1;	/**< Enable write-through caching */
-	unsigned cache_disabled:1;	/**< Disable caching in TLB */
-	unsigned accessed:1;		/**< 1 if page has been accessed */
-	unsigned dirty:1;		/**< 1 if page has been written to */
-	unsigned _reserved0:1;
-	unsigned global:1;		/**< Don't update the TLB on table swap if 1 */
-	unsigned _reserved1:3;
-	uintptr_t shifted_address:20;	/**< Aligned pointer to the physical page */
+/*  0 */bool present:1;			/**< Page Fault on access if 0 */
+/*  1 */bool rw:1;			/**< Page Fault on write if 0 */
+/*  2 */bool user:1;			/**< Page Fault on user mode access if 0 */
+/*  3 */bool write_through:1;		/**< Enable write-through caching */
+/*  4 */bool cache_disabled:1;		/**< Disable caching in TLB */
+/*  5 */bool accessed:1;		/**< 1 if page has been accessed */
+/*  6 */bool dirty:1;			/**< 1 if page has been written to */
+/*  7 */unsigned _reserved0:1;
+/*  8 */bool global:1;			/**< Don't update the TLB on table swap if 1 */
+/*  9 */bool slab:1;			/**< Used by the slab allocator */
+/* 10 */bool atomic:1;			/**< Allocated atomically */
+/* 11 */unsigned _unused:1;
+/* 12 */uintptr_t shifted_address:20;	/**< Aligned pointer to the physical page */
 } __packed;
 #define __PFLAG_PRESENT		(1 << 0)
 #define __PFLAG_RW		(1 << 1)
@@ -34,6 +42,8 @@ struct x86_page_table_entry {
 #define __PFLAG_ACCESSED	(1 << 5)
 #define __PFLAG_DIRTY		(1 << 6)
 #define __PFLAG_GLOBAL		(1 << 8)
+#define __PFLAG_SLAB		(1 << 9)
+#define __PFLAG_ATOMIC		(1 << 10)
 
 struct x86_page_table {
 	struct x86_page_table_entry entries[1024];
@@ -50,19 +60,22 @@ struct x86_page_table {
  * @param index Table index in the page directory
  */
 #define X86_CURRENT_PT(index) ( &((struct x86_page_table *)X86_PD_OFFSET)[index] )
+#define X86_CURRENT_PTE(pd_index, pt_index) (&(X86_CURRENT_PT(pd_index)->entries[pt_index]))
 
 struct x86_page_directory_entry {
-	unsigned present:1;		/**< Page Fault on access if 0 */
-	unsigned rw:1;			/**< Page Fault on write if 0 */
-	unsigned user:1;		/**< Page Fault on user mode access if 0 */
-	unsigned write_through:1;	/**< Enable write-through caching */
-	unsigned cache_disabled:1;	/**< Disable caching in TLB */
-	unsigned accessed:1;		/**< 1 if page has been accessed */
-	unsigned _reserved0:1;
-	unsigned huge:1;		/**< 0 = 4K, 1 = 4M */
-	unsigned _reserved1:1;
-	unsigned _ignored2:3;
-	uintptr_t shifted_address:20;	/**< Aligned pointer to `struct x86_page_table` */
+/*  0 */bool present:1;		/**< Page Fault on access if 0 */
+/*  1 */bool rw:1;		/**< Page Fault on write if 0 */
+/*  2 */bool user:1;		/**< Page Fault on user mode access if 0 */
+/*  3 */bool write_through:1;	/**< Enable write-through caching */
+/*  4 */bool cache_disabled:1;	/**< Disable caching in TLB */
+/*  5 */bool accessed:1;	/**< 1 if page has been accessed */
+/*  6 */unsigned _reserved0:1;
+/*  7 */bool huge:1;		/**< 0 = 4K, 1 = 4M */
+/*  8 */unsigned _reserved1:1;
+/*  9 */bool slab:1;		/**< Used by the slab allocator (only if `!huge`) */
+/* 10 */bool atomic:1;		/**< Allocated atomically (only if `!huge`) */
+/* 11 */unsigned _unused:1;
+/* 12 */uintptr_t shifted_address:20;	/**< Aligned pointer to `struct x86_page_table` */
 } __packed;
 #define __PFLAG_HUGE		(1 << 7)
 
@@ -77,6 +90,7 @@ struct x86_page_directory {
  * `arch/x86/mm/page.c` for a more detailed explanation.
  */
 #define X86_CURRENT_PD ((struct x86_page_directory *)X86_CURRENT_PT(1023))
+#define X86_CURRENT_PDE(index) (&X86_CURRENT_PD->entries[index])
 
 /**
  * @brief Arch dependent virtual memory information data structure (x86 version).
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 43528ce..cf088b4 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -17,8 +17,8 @@
 #include <gay/errno.h>
 #include <gay/kprintf.h>
 #include <gay/mm.h>
+#include <gay/systm.h>
 #include <gay/types.h>
-#include <gay/util.h>
 
 #include <string.h>
 
@@ -48,7 +48,7 @@ int map_page(uintptr_t phys, void *virt, enum pflags flags)
 	usize pt_index = ((uintptr_t)virt >> PAGE_SHIFT) % 1024;
 
 	struct x86_page_directory_entry *pde = &X86_CURRENT_PD->entries[pd_index];
-	if (flags & PFLAG_HUGE) {
+	if (flags & P_HUGE) {
 #		ifdef DEBUG
 			if (phys != HUGEPAGE_ALIGN(phys)) {
 				kprintf("map_page(): unaligned physical address %p!\n",
@@ -61,13 +61,18 @@ int map_page(uintptr_t phys, void *virt, enum pflags flags)
 			}
 #		endif
 
+		if (pde->present && !pde->huge) {
+			void *pt = __v(pde->shifted_address << PAGE_SHIFT);
+			free_pages(pt);
+		}
+
 		*(unsigned long *)pde = 0;
 		pde->present = 1;
 		pde->huge = 1;
-		pde->rw = (flags & PFLAG_RW) != 0;
-		pde->user = (flags & PFLAG_USER) != 0;
-		pde->accessed = (flags & PFLAG_ACCESSED) != 0;
-		pde->cache_disabled = (flags & PFLAG_NOCACHE) != 0;
+		pde->rw = (flags & P_RW) != 0;
+		pde->user = (flags & P_USER) != 0;
+		pde->accessed = (flags & P_ACCESSED) != 0;
+		pde->cache_disabled = (flags & P_NOCACHE) != 0;
 		pde->shifted_address = phys >> PAGE_SHIFT;
 		return 0;
 	}
@@ -80,7 +85,7 @@ int map_page(uintptr_t phys, void *virt, enum pflags flags)
 	struct x86_page_table *pt = X86_CURRENT_PT(pd_index);
 
 	if (!pde->present) {
-		uintptr_t pt_phys = vtophys(get_pages(1, MM_ATOMIC));
+		uintptr_t pt_phys = vtophys(get_pages(1, M_ATOMIC));
 		if (!pt_phys)
 			return -ENOMEM;
 
@@ -94,9 +99,9 @@ int map_page(uintptr_t phys, void *virt, enum pflags flags)
 
 	struct x86_page_table_entry *pte = &pt->entries[pt_index];
 	*(unsigned long *)pte = 0; /* zero out the entire entry first */
-	pte->rw = (flags & PFLAG_RW) != 0;
-	pte->user = (flags & PFLAG_USER) != 0;
-	pte->cache_disabled = (flags & PFLAG_NOCACHE) != 0;
+	pte->rw = (flags & P_RW) != 0;
+	pte->user = (flags & P_USER) != 0;
+	pte->cache_disabled = (flags & P_NOCACHE) != 0;
 	pte->shifted_address = phys >> PAGE_SHIFT;
 	pte->present = 1;
 
@@ -107,20 +112,20 @@ int map_page(uintptr_t phys, void *virt, enum pflags flags)
  * The only difference between this and map_page() is that we can't allocate
  * new pages using get_pages() but have to use __early_get_page() instead here.
  * So, all we need to do is ensure that map_page() doesn't need to allocate new
- * pages when we call it, which it only does if pflags does not have PFLAG_HUGE
+ * pages when we call it, which it only does if pflags does not have P_HUGE
  * set and the page table doesn't exist (present bit in the page directory is
- * clear).  Therefore, we just need to make sure that, if PFLAG_HUGE is *not*
+ * clear).  Therefore, we just need to make sure that, if P_HUGE is *not*
  * set, the page table is already allocated and marked as present in the page
  * directory.
  */
 void __early_map_page(uintptr_t phys, void *virt, enum pflags pflags)
 {
-	if (!(pflags & PFLAG_HUGE)) {
+	if (!(pflags & P_HUGE)) {
 		usize pd_index = ((uintptr_t)virt >> PAGE_SHIFT) / 1024;
 		struct x86_page_directory_entry *pde = &X86_CURRENT_PD->entries[pd_index];
 		if (!pde->present) {
 			uintptr_t pt_phys = __early_get_page();
-			*(unsigned long *)pde = PFLAG_PRESENT | PFLAG_RW;
+			*(unsigned long *)pde = P_PRESENT | P_RW;
 			pde->shifted_address = pt_phys >> PAGE_SHIFT;
 		}
 	}
@@ -146,23 +151,21 @@ uintptr_t unmap_page(void *virt)
 
 	uintptr_t phys = 0;
 	if (pde->huge) {
-		phys = pde->shifted_address;
-		phys <<= HUGEPAGE_SHIFT;
-		*(unsigned long *)pde = 0;
+		phys = pde->shifted_address << PAGE_SHIFT;
+		pde->present = 0;
 	} else {
 		struct x86_page_table *pt = X86_CURRENT_PT(pd_index);
 		struct x86_page_table_entry *pte = &pt->entries[pt_index];
 		if (pte->present) {
-			phys = pte->shifted_address;
-			phys <<= PAGE_SHIFT;
-			*(unsigned long *)pte = 0;
+			phys = pte->shifted_address << PAGE_SHIFT;
+			pte->present = 0;
 		}
 	}
 
 	return phys;
 }
 
-void x86_isr_page_fault(struct x86_trap_frame *frame, u32 error_code)
+void x86_isr_page_fault(trap_frame_t *frame, u32 error_code)
 {
 	void *address;
 	__asm__ volatile(
@@ -193,7 +196,7 @@ void x86_isr_page_fault(struct x86_trap_frame *frame, u32 error_code)
 
 	kprintf("\n##########  B O N K  ##########\n");
 	kprintf("Illegal %s %s%s address %p!\n", space, rwx, present, address);
-	x86_print_regs(frame);
+	print_regs(frame);
 	kprintf("system halted");
 	__asm__ volatile(
 "	cli		\n"
@@ -207,7 +210,7 @@ uintptr_t vtophys(void *virt)
 	usize pd_index = ((uintptr_t)virt >> PAGE_SHIFT) / 1024;
 	usize pt_index = ((uintptr_t)virt >> PAGE_SHIFT) % 1024;
 
-	struct x86_page_directory_entry *pde = &X86_CURRENT_PD->entries[pd_index];
+	struct x86_page_directory_entry *pde = X86_CURRENT_PDE(pd_index);
 	if (!pde->present)
 		return 0;
 
@@ -217,8 +220,7 @@ uintptr_t vtophys(void *virt)
 		phys <<= PAGE_SHIFT; /* attention, this is not HUGEPAGE_SHIFT */
 		phys |= (uintptr_t)virt & ~HUGEPAGE_MASK;
 	} else {
-		struct x86_page_table *pt = X86_CURRENT_PT(pd_index);
-		struct x86_page_table_entry *pte = &pt->entries[pt_index];
+		struct x86_page_table_entry *pte = X86_CURRENT_PTE(pd_index, pt_index);
 		if (pte->present) {
 			phys = pte->shifted_address;
 			phys <<= PAGE_SHIFT;
diff --git a/include/gay/mm.h b/include/gay/mm.h
index c2c0fc9..295d2ae 100644
--- a/include/gay/mm.h
+++ b/include/gay/mm.h
@@ -21,24 +21,25 @@
 
 #include <arch/page.h>
 
+#include <gay/cdefs.h>
 #include <gay/types.h>
 
 /**
  * @brief Memory allocation flags passed to `kmalloc()`.
  */
-enum mm_flags {
+enum mflags {
 	/** @brief Physically contiguous memory for DMA. */
-	MM_CONTIG	= (1 << 0),
+	M_CONTIG	= (1 << 0),
 	/** @brief Use emergency memory reserves if necessary. */
-	MM_EMERG	= (1 << 1),
+	M_EMERG		= (1 << 1),
 	/** @brief Don't sleep during the allocation. */
-	MM_NOSLEEP	= (1 << 2),
+	M_NOSLEEP	= (1 << 2),
 	/** @brief Allocate userspace memory. */
-	MM_USER		= (1 << 4),
+	M_USER		= (1 << 4),
 	/** @brief Kernel memory */
-	MM_KERN		= MM_CONTIG,
+	M_KERN		= M_CONTIG,
 	/** @brief Allocate memory in atomic (irq) context. */
-	MM_ATOMIC	= MM_EMERG | MM_NOSLEEP,
+	M_ATOMIC	= M_EMERG | M_NOSLEEP,
 };
 
 /**
@@ -50,7 +51,7 @@ enum mm_flags {
  * @param flags Allocation flags
  * @returns The allocated memory area, or `NULL` if OOM
  */
-void *kmalloc(size_t size, enum mm_flags flags) __malloc_like __alloc_size(1);
+void *kmalloc(size_t size, enum mflags flags) __malloc_like __alloc_size(1);
 
 /**
  * @brief Release memory.
@@ -59,16 +60,30 @@ void *kmalloc(size_t size, enum mm_flags flags) __malloc_like __alloc_size(1);
  */
 void kfree(void *ptr);
 
+/**
+ * @brief Flags for the paging structures.
+ *
+ * The macros with two underscores in front of them are defined in `arch/page.h`
+ * and match the respective bit positions in the platform's native hardware
+ * layout for better performance (no shifting around required).
+ */
 enum pflags {
-	PFLAG_PRESENT		= __PFLAG_PRESENT,
-	PFLAG_RW		= __PFLAG_RW,
-	PFLAG_USER		= __PFLAG_USER,
-	PFLAG_ACCESSED		= __PFLAG_ACCESSED,
-	PFLAG_DIRTY		= __PFLAG_DIRTY,
-	PFLAG_GLOBAL		= __PFLAG_GLOBAL,
-	PFLAG_NOCACHE		= __PFLAG_NOCACHE,
+	P_PRESENT	= __PFLAG_PRESENT,	/**< @brief Page exists */
+	P_RW		= __PFLAG_RW,		/**< @brief Page is writable */
+	P_USER		= __PFLAG_USER,		/**< @brief Page is accessible from ring 3 */
+	P_ACCESSED	= __PFLAG_ACCESSED,	/**< @brief Page has been accessed */
+	P_DIRTY		= __PFLAG_DIRTY,	/**< @brief Page has been written */
+	P_GLOBAL	= __PFLAG_GLOBAL,	/**< @brief The entry survives `vm_flush()` */
+	P_NOCACHE	= __PFLAG_NOCACHE,	/**< @brief The TLB won't cache this entry */
+	P_SLAB		= __PFLAG_SLAB,		/**< @brief Page is used by the slab allocator */
+	P_NOSLEEP	= __PFLAG_ATOMIC,	/**< @brief Page is atomic */
 #ifdef __HAVE_HUGEPAGES
-	PFLAG_HUGE		= __PFLAG_HUGE,
+	/** @brief This page is `HUGEPAGE_SIZE` bytes long, rather than `PAGE_SIZE` */
+	P_HUGE		= __PFLAG_HUGE,
+#endif
+#ifdef __HAVE_NOEXEC
+	/** @brief No instructions can be fetched from this page */
+	P_NOEXEC	= __PFLAG_NOEXEC,
 #endif
 };
 
@@ -77,7 +92,7 @@ enum pflags {
  * set up.  Don't ever use these anywhere, because they *will* break everything.
  */
 void __early_map_page(uintptr_t phys, void *virt, enum pflags flags);
-/* This just shrinks the usable physical area by PAGE_SIZE and returns the page */
+/* This just shrinks phys_end by PAGE_SIZE and returns the page */
 uintptr_t __early_get_page(void);
 
 /**
@@ -134,18 +149,21 @@ extern uintptr_t phys_end;
 int pages_init(void);
 
 /**
- * @brief Allocate and map a contiguous region in physical memory.
- * The physical region will be mapped to its corresponding virtual address
- * between `DMAP_START` and `DMAP_END`, such that the physical address can be
- * calculated with `ptr - DMAP_OFFSET`.
- *
- * @param count Number of contiguous pages to allocate
- * @param flags
- * @return
+ * @brief Allocate a contiguous region in physical memory.
+ * The returned region will be `(1 << order) * PAGE_SIZE` bytes long.
+ *
+ * @param order Order of magnitude (as in `1 << order`) for the region size
+ * @param flags How to allocate (`order` must be 0 if `M_NOSLEEP` is specified)
+ * @return A pointer to the beginning of the region in the direct mapping area,
+ *	or `nil` if the allocation failed
  */
-void *get_pages(usize count, enum mflags flags) __malloc_like;
-#define GET_PAGE_LEVELS (HUGEPAGE_SHIFT - PAGE_SHIFT + 1)
-#define GET_PAGE_MAXCOUNT (1 << (HUGEPAGE_SHIFT - PAGE_SHIFT))
+void *get_pages(int order, enum mflags flags) __malloc_like;
+#ifdef __HAVE_HUGEPAGES
+#define GET_PAGE_ORDERS (HUGEPAGE_SHIFT - PAGE_SHIFT + 1)
+#else
+#define GET_PAGE_ORDERS 10
+#endif
+#define GET_PAGE_MAX_ORDER (GET_PAGE_ORDERS - 1)
 
 void free_pages(void *ptr);
 
@@ -156,19 +174,6 @@ void free_pages(void *ptr);
  */
 void slab_init(void);
 
-/**
- * @brief Allocate contiguous memory from the slab caches.
- * This is only used internally by `kmalloc()` and for relatively small
- * objects (<< PAGE_SIZE). If you need memory, use `kmalloc()` instead.
- *
- * @param size Requested memory size
- * @param flags Flags that are passed to `get_pages` for creating new caches
- * @return The allocated pointer, or `nil` if OOM or `size` was too big
- */
-void *slab_alloc(usize size, enum mm_flags flags) __malloc_like __alloc_size(1);
-
-void slab_free(void *ptr);
-
 /**
  * @brief Return where a physical address maps to in the direct memory area.
  * The returned pointer will be within the range `DMAP_START` (inclusive)
@@ -177,10 +182,11 @@ void slab_free(void *ptr);
  * @param phys Physical address
  * @return Virtual address
  */
-static __always_inline void *__v(uintptr_t phys)
+static inline void *__v(uintptr_t phys)
 {
 #	ifdef DEBUG
-		if (phys > phys_end)
+		if (phys > phys_end) {
+			kprintf("__v(%p): phys ptr out of range!\n", (void *)phys);
 			return nil;
 #	endif
 	return (void *)phys + DMAP_OFFSET;
@@ -197,11 +203,13 @@ static __always_inline void *__v(uintptr_t phys)
  * @return The physical address, i.e. `virt - DMAP_OFFSET`
  * @see vtophys()
  */
-static __always_inline uintptr_t __p(void *virt)
+static inline uintptr_t __p(void *virt)
 {
 #	ifdef DEBUG
-		if (virt < DMAP_START || virt >= DMAP_END)
+		if (virt < DMAP_START || virt >= DMAP_END) {
+			kprintf("__p(%p): virt ptr out of range!\n", virt);
 			return 0;
+		}
 #	endif
 	return (uintptr_t)virt - DMAP_OFFSET;
 }
diff --git a/include/stdlib.h b/include/stdlib.h
index fb2342a..a6a8e34 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -7,7 +7,7 @@
 
 #ifdef _KERNEL
 #include <gay/mm.h>
-#define malloc(size) kmalloc(size, MM_KERN)
+#define malloc(size) kmalloc(size, M_KERN)
 #efine free(ptr) kfree(ptr)
 #else
 /*
diff --git a/kernel/mm/kmalloc.c b/kernel/mm/kmalloc.c
index 147d2c2..4a36dd1 100644
--- a/kernel/mm/kmalloc.c
+++ b/kernel/mm/kmalloc.c
@@ -58,6 +58,21 @@ int kmalloc_init(uintptr_t _phys_start, uintptr_t _phys_end)
 	return 0;
 }
 
+__weak void *malloc(usize size)
+{
+	return kmalloc(size, M_KERN);
+}
+
+__weak void free(void *ptr)
+{
+	kfree(ptr);
+}
+
+/*
+ * Looking for kmalloc() and kfree()?
+ * Those two are in slab.c for purely organizational reasons.
+ */
+
 /*
  * This file is part of GayBSD.
  * Copyright (c) 2021 fef <owo@fef.moe>.
diff --git a/kernel/mm/page.c b/kernel/mm/page.c
index 79741a6..d280bde 100644
--- a/kernel/mm/page.c
+++ b/kernel/mm/page.c
@@ -8,6 +8,7 @@
 #include <gay/kprintf.h>
 #include <gay/mm.h>
 #include <gay/mutex.h>
+#include <gay/systm.h>
 #include <gay/types.h>
 #include <gay/util.h>
 
@@ -37,6 +38,7 @@
 #endif
 
 #if CFG_DEBUG_PAGE_ALLOCS
+#	define PAGE_ASSERT(x) KASSERT(x)
 #	define page_debug(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
 #	if CFG_DEBUG_PAGE_ALLOCS_NOISY
 #		define page_debug_noisy(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
@@ -44,6 +46,7 @@
 #		define page_debug_noisy(msg, ...) ({})
 #	endif
 #else
+#	define PAGE_ASSERT(x) ({})
 #	define page_debug(msg, ...) ({})
 #	define page_debug_noisy(msg, ...) ({})
 #endif
@@ -54,14 +57,14 @@
  * the one below it, starting at one page per entry.  The effective result is
  * that a single entry in the cache on level L covers `(1 << L)` pages.
  */
-#define CACHE_LEVELS GET_PAGE_LEVELS
+#define CACHE_ORDERS GET_PAGE_ORDERS
 
-#define LEVEL_SHIFT(level) (PAGE_SHIFT + (level))
+#define ORDER_SHIFT(order) (PAGE_SHIFT + (order))
 
-/** @brief There is one of this for every cache level. */
+/** @brief There is one of this for every cache order. */
 struct cache_pool {
 	/**
-	 * @brief List of free blocks on this level of granularity.
+	 * @brief List of free blocks on this order of granularity.
 	 * The individual entries sit right at the beginning of each free block,
 	 * and are always aligned to `entry_size` bytes.
 	 */
@@ -74,11 +77,9 @@ struct cache_pool {
 	/** @brief Number of items in `freelist`. */
 	usize free_entries;
 };
-static struct cache_pool caches[CACHE_LEVELS];
+static struct cache_pool caches[CACHE_ORDERS];
 static MTX(caches_lock);
 
-#define LONG_BIT_MASK (~(LONG_BIT - 1))
-
 /* these get set in kmalloc_init() */
 uintptr_t phys_start;
 uintptr_t phys_end;
@@ -91,28 +92,48 @@ uintptr_t __early_get_page(void)
 
 static int sanity_check(void)
 {
+	KASSERT(phys_start < phys_end);
+	KASSERT(phys_start == HUGEPAGE_ALIGN(phys_start));
 	/* phys_end is only page aligned, see kmalloc_init() */
-	if (phys_end != PAGE_ALIGN(phys_end) || phys_start != HUGEPAGE_ALIGN(phys_start)) {
-		kprintf("Unaligned memory, this should never be possible\n");
-		return 1;
-	}
+	KASSERT(phys_end == PAGE_ALIGN(phys_end));
 
 	if ((phys_end - phys_start) < (32 * 1024 * 1024)) {
 		kprintf("Less than 32 MB of usable RAM, this wouldn't go well\n");
 		return 1;
 	}
 
-	if (phys_start > phys_end) {
-		kprintf("Hey, this is funny. pages_init() was called with parameters "
-			"such that phys_start > phys_end (%p > %p), which "
-			"should absolutely never be possible. I can't really continue "
-			"like this, so have a nice day.\n", (void *)phys_start, (void *)phys_end);
-		return 1;
-	}
-
 	return 0;
 }
 
+/*
+ * Map the entire physical memory into the direct contiguous area.
+ * __early_map_page() might call __early_get_page() in order to allocate
+ * new page table structures, which in turn shrinks the physical memory
+ * size (see above).
+ */
+static inline void map_direct_area(void)
+{
+#ifdef __HAVE_HUGEPAGES
+	const usize step = HUGEPAGE_SIZE;
+	const enum pflags flags = P_PRESENT | P_RW | P_HUGE;
+#else
+	const usize step = PAGE_SIZE;
+	const enum pflags flags = P_PRESENT | P_RW;
+#endif
+
+	/*
+	 * It might be necessary to use a volatile pointer to phys_end for this
+	 * loop in case clang does The Optimization and caches its value for
+	 * whatever reason, even though at least for x86 this is not the case
+	 * (and i don't even thing the C standard allows it when calling
+	 * external functions in between, but still, Never Trust The Compiler).
+	 */
+	for (uintptr_t pos = phys_start; pos <= phys_end - step; pos += step)
+		__early_map_page(pos, __v(pos), flags);
+
+	vm_flush();
+}
+
 /*
  * This function maps the entire physical memory into the direct region
  * (DMAP_START - DMAP_END) and sets up the caches.
@@ -124,18 +145,7 @@ int pages_init(void)
 	if (sanity_check() != 0)
 		return 1;
 
-	/*
-	 * Map the entire physical memory into the direct contiguous area.
-	 * __early_map_page() might call __early_get_page() in order to allocate
-	 * new page table structures, which in turn shrinks the physical memory
-	 * size (see above).
-	 * It might be necessary to use a volatile pointer to phys_end for this
-	 * loop in case clang does The Optimization and caches its value for
-	 * whatever reason, even though at least for x86 this is not the case.
-	 */
-	for (uintptr_t physptr = phys_start; physptr < phys_end; physptr += HUGEPAGE_SIZE)
-		__early_map_page(physptr, __v(physptr), PFLAG_HUGE | PFLAG_RW | PFLAG_GLOBAL);
-	vm_flush();
+	map_direct_area();
 
 	/* phys_end gets aligned, as promised by the comment in kmalloc_init() */
 	phys_end = align_floor(phys_end, HUGEPAGE_SIZE);
@@ -145,13 +155,9 @@ int pages_init(void)
 	 * calculate the size of each bitmap, as well as their combined size
 	 */
 	usize bitmap_bytes = 0;
-	for (int i = 0; i < CACHE_LEVELS; i++) {
-		usize bits = phys_size >> LEVEL_SHIFT(i);
-		/* round up to the next full long */
-		if (bits & ~LONG_BIT_MASK) {
-			bits &= LONG_BIT_MASK;
-			bits += LONG_BIT;
-		}
+	for (int i = 0; i < CACHE_ORDERS; i++) {
+		usize bits = phys_size >> ORDER_SHIFT(i);
+		bits = align_ceil(bits, LONG_BIT);
 		bitmap_bytes += bits / 8;
 	}
 
@@ -169,11 +175,11 @@ int pages_init(void)
 	 * preallocate entries that can't be handed out (i.e. the cache bitmaps)
 	 */
 	unsigned long *bitmap_pos = bitmap_start;
-	for (int i = 0; i < CACHE_LEVELS; i++) {
+	for (int i = 0; i < CACHE_ORDERS; i++) {
 		/* total amount of entries on this level */
-		usize total_bits = phys_size >> LEVEL_SHIFT(i);
+		usize total_bits = phys_size >> ORDER_SHIFT(i);
 		/* number of entries on this level that the bitmap itself takes up */
-		usize wasted_bits = bitmap_bytes >> LEVEL_SHIFT(i);
+		usize wasted_bits = bitmap_bytes >> ORDER_SHIFT(i);
 		if (wasted_bits == 0)
 			wasted_bits = 1;
 		bit_set_range(bitmap_pos, total_bits - wasted_bits, wasted_bits);
@@ -190,11 +196,11 @@ int pages_init(void)
 	kheap_end = align_floor(bitmap_start, HUGEPAGE_SIZE);
 
 	/*
-	 * populate the freelist on the highest level, all levels beneath it
+	 * populate the freelist on the highest order, all orders beneath it
 	 * stay empty until one of the large blocks gets split up
 	 */
-	struct cache_pool *high_pool = &caches[CACHE_LEVELS - 1];
-	usize step = 1 << LEVEL_SHIFT(CACHE_LEVELS - 1);
+	struct cache_pool *high_pool = &caches[CACHE_ORDERS - 1];
+	usize step = 1 << ORDER_SHIFT(CACHE_ORDERS - 1);
 	for (void *pos = kheap_start; pos < kheap_end; pos += step) {
 		struct clist *entry = pos;
 		clist_add(&high_pool->freelist, entry);
@@ -218,62 +224,62 @@ static void *split_buddy(void *ptr, int level);
 
 /**
  * @brief Attempt to coalesce a block with its buddy.
- * If coalition is possible, the buddy is removed from its freelist at `level`.
+ * If coalition is possible, the buddy is removed from its freelist at `order`.
  *
  * @param ptr Pointer to the block
- * @param level Cache level, must be less than `CACHE_LEVELS - 1` (because you
- *	can't join blocks at the highest cache level)
+ * @param order Cache order, must be less than `CACHE_ORDERS - 1` (because you
+ *	can't join blocks at the highest cache order)
  * @return The joined block, or `nil` if coalition was not possible
  */
-static void *try_join_buddy(void *ptr, int level);
+static void *try_join_buddy(void *ptr, int order);
 
-static inline usize get_bit_number(void *ptr, int level)
+static inline usize get_bit_number(void *ptr, int order)
 {
-	return ((uintptr_t)ptr - (uintptr_t)kheap_start) >> LEVEL_SHIFT(level);
+	return ((uintptr_t)ptr - (uintptr_t)kheap_start) >> ORDER_SHIFT(order);
 }
 
-void *get_pages(usize count, enum mm_flags flags)
+void *get_pages(int order, enum mflags flags)
 {
-	int level;
-	for (level = 0; level < CACHE_LEVELS; level++) {
-		if ((1 << level) >= count)
-			break;
-	}
-	if (level == CACHE_LEVELS) {
-		page_debug("get_pages(%zu, %08x): count too large!\n", count, flags);
+	PAGE_ASSERT(order >= 0);
+
+	if (order >= GET_PAGE_ORDERS) {
+		page_debug("get_pages(%d, %#08x): Order too high!\n", order, flags);
 		return nil;
 	}
 
-	if (flags & MM_NOSLEEP) {
-		kprintf("get_pages(): MM_NOSLEEP requested, this is not implemented yet :(\n");
+	if (flags & M_NOSLEEP) {
+		kprintf("get_pages(): M_NOSLEEP requested, this is not implemented yet :(\n");
 		return nil;
 	}
 	mtx_lock(&caches_lock);
 
 	struct clist *entry = nil;
-	int entry_level;
-	for (entry_level = level; entry_level < CACHE_LEVELS; entry_level++) {
-		if (caches[entry_level].free_entries > 0) {
-			entry = caches[entry_level].freelist.next;
+	int entry_order;
+	for (entry_order = order; entry_order < CACHE_ORDERS; entry_order++) {
+		if (caches[entry_order].free_entries > 0) {
+			entry = caches[entry_order].freelist.next;
 			break;
 		}
 	}
-	if (entry_level == CACHE_LEVELS)
-		goto unlock;
-
-	clist_del(entry);
-	caches[entry_level].free_entries--;
-
-	usize bit_number = get_bit_number(entry, entry_level);
-	while (entry_level > level) {
-		entry = split_buddy(entry, entry_level);
-		bit_set(caches[entry_level].bitmap, bit_number);
-		entry_level--;
-		bit_number <<= 1;
+
+	if (entry_order != CACHE_ORDERS) {
+		clist_del(entry);
+		caches[entry_order].free_entries--;
+
+		usize bit_number = get_bit_number(entry, entry_order);
+		while (entry_order > order) {
+			entry = split_buddy(entry, entry_order);
+			bit_set(caches[entry_order].bitmap, bit_number);
+			entry_order--;
+			bit_number <<= 1;
+		}
+		bit_set(caches[order].bitmap, bit_number);
+
+#		if CFG_POISON_PAGES
+			memset(entry, 'a', 1 << ORDER_SHIFT(order));
+#		endif
 	}
-	bit_set(caches[level].bitmap, bit_number);
 
-unlock:
 	mtx_unlock(&caches_lock);
 	return (void *)entry;
 }
@@ -287,54 +293,66 @@ void free_pages(void *ptr)
 		}
 #	endif
 
-	mtx_lock(&caches_lock);
+	if (sus_nil(ptr)) {
+		page_debug("free_pages(%p): tried to free NULL!\n", ptr);
+		return;
+	}
 
-	int level = 0;
-	usize bit_number = get_bit_number(ptr, level);
-	for (; level < CACHE_LEVELS; level++) {
-		if (bit_tst(caches[level].bitmap, bit_number))
+	int order = 0;
+	usize bit_number = get_bit_number(ptr, order);
+	for (; order < CACHE_ORDERS; order++) {
+		if (bit_tst(caches[order].bitmap, bit_number))
 			break;
 		bit_number >>= 1;
 	}
 
-	if (level == CACHE_LEVELS) {
+	if (order == CACHE_ORDERS) {
 		page_debug("free_pages(%p): double free!\n", ptr);
-		goto unlock;
+		return;
 	}
+	int original_order = order;
 
-	while (level < CACHE_LEVELS - 1) {
-		bit_clr(caches[level].bitmap, bit_number);
+	mtx_lock(&caches_lock);
+
+	while (order < CACHE_ORDERS - 1) {
+		bit_clr(caches[order].bitmap, bit_number);
 
-		void *tmp = try_join_buddy(ptr, level);
+		void *tmp = try_join_buddy(ptr, order);
 		if (tmp == nil)
 			break;
 
 		ptr = tmp;
-		level++;
+		order++;
 		bit_number >>= 1;
 	}
 
-	clist_add(&caches[level].freelist, (struct clist *)ptr);
-	caches[level].free_entries++;
+	if (order == CACHE_ORDERS - 1 && original_order != CACHE_ORDERS - 1)
+		set_pflags(HUGEPAGE_ALIGN(ptr), P_HUGE | P_RW);
+
+#if CFG_POISON_PAGES
+	memset(ptr, 'A', 1 << ORDER_SHIFT(order));
+#endif
+
+	clist_add(&caches[order].freelist, (struct clist *)ptr);
+	caches[order].free_entries++;
 
-unlock:
 	mtx_unlock(&caches_lock);
 }
 
 static inline void *split_buddy(void *ptr, int level)
 {
 #	if CFG_DEBUG_PAGE_ALLOCS
-		if ((uintptr_t)ptr % (1 << LEVEL_SHIFT(level))) {
+		if ((uintptr_t)ptr % (1 << ORDER_SHIFT(level))) {
 			kprintf("split_buddy(ptr = %p, level = %d): unaligned ptr!\n", ptr, level);
 			return nil;
 		}
-		if (level < 1 || level >= CACHE_LEVELS) {
+		if (level < 1 || level >= CACHE_ORDERS) {
 			kprintf("split_buddy(ptr = %p, level = %d): invalid level!\n", ptr, level);
 			return nil;
 		}
 #	endif
 
-	struct clist *high_buddy = ptr + (1 << LEVEL_SHIFT(level - 1));
+	struct clist *high_buddy = ptr + (1 << ORDER_SHIFT(level - 1));
 	clist_add(&caches[level - 1].freelist, high_buddy);
 	caches[level - 1].free_entries++;
 
@@ -343,19 +361,19 @@ static inline void *split_buddy(void *ptr, int level)
 	return ptr;
 }
 
-static void *try_join_buddy(void *ptr, int level)
+static void *try_join_buddy(void *ptr, int order)
 {
-	const usize entry_size = 1 << LEVEL_SHIFT(level);
+	const usize entry_size = 1 << ORDER_SHIFT(order);
 
 #	if CFG_DEBUG_PAGE_ALLOCS
 		if ((uintptr_t)ptr % entry_size) {
-			kprintf("try_join_buddy(%p, %d): unaligned ptr!\n", ptr, level);
+			kprintf("try_join_buddy(%p, %d): unaligned ptr!\n", ptr, order);
 			return nil;
 		}
-		/* level must be < CACHE_LEVELS - 1 because you
-		 * can't join blocks on the topmost level */
-		if (level >= CACHE_LEVELS - 1) {
-			kprintf("try_join_buddy(%p, %d): level >= CACHE_LEVELS - 1!\n", ptr, level);
+		/* order must be < CACHE_ORDERS - 1 because you
+		 * can't join blocks on the topmost order */
+		if (order >= CACHE_ORDERS - 1) {
+			kprintf("try_join_buddy(%p, %d): order >= CACHE_ORDERS - 1!\n", ptr, order);
 			return nil;
 		}
 #	endif
@@ -367,15 +385,15 @@ static void *try_join_buddy(void *ptr, int level)
 	 * for any if branches.
 	 */
 	uintptr_t buddy = (uintptr_t)ptr ^ entry_size;
-	usize buddy_bitnum = get_bit_number((void *)buddy, level);
-	if (bit_tst(caches[level].bitmap, buddy_bitnum))
+	usize buddy_bitnum = get_bit_number((void *)buddy, order);
+	if (bit_tst(caches[order].bitmap, buddy_bitnum))
 		return nil;
 
-	page_debug_noisy("join (%p:%p), lvl=%d\n", ptr, (void *)buddy, level);
+	page_debug_noisy("join (%p:%p), order=%d\n", ptr, (void *)buddy, order);
 
 	/* If the buddy is free, we remove it from the freelist ... */
 	clist_del((struct clist *)buddy);
-	caches[level].free_entries--;
+	caches[order].free_entries--;
 
 	/*
 	 * ... and return a pointer to the coalesced block.
diff --git a/kernel/mm/slab.c b/kernel/mm/slab.c
index 2fd55f8..9ca760d 100644
--- a/kernel/mm/slab.c
+++ b/kernel/mm/slab.c
@@ -53,7 +53,6 @@ struct slab {
 #define SLAB_STEP (sizeof(struct clist))
 
 #define SLAB_OVERHEAD (sizeof(struct slab))
-#define SLAB_EFFECTIVE_SIZE (SLAB_SIZE - SLAB_OVERHEAD)
 #define SLAB_MAX_ALLOC (SLAB_SIZE - SLAB_OVERHEAD)
 /* slabs are always aligned ... */
 #define SLAB_PTR_MASK (~(SLAB_SIZE - 1))
@@ -65,18 +64,60 @@ struct slab {
 #	if CFG_DEBUG_SLAB_ALLOCS_NOISY
 #		define slab_debug_noisy(msg, ...) kprintf("[slab] " msg, ##__VA_ARGS__)
 #	else
-#		define slab_debug_noisy(msg, ...)
+#		define slab_debug_noisy(msg, ...) ({})
 #	endif
 #else
-#	define slab_debug(msg, ...)
-#	define slab_debug_noisy(msg, ...)
+#	define slab_debug(msg, ...) ({})
+#	define slab_debug_noisy(msg, ...) ({})
 #endif
 
-/** @brief All slab pools, indexed by `entry_size / SLAB_STEP - 1` */
+/** @brief All slabs grouped by entry_size, indexed by `entry_size / SLAB_STEP - 1` */
 struct clist pools[SLAB_MAX_ALLOC / SLAB_STEP];
 
-static struct slab *slab_create(unsigned int entry_size, enum mm_flags flags);
-static usize round_size(usize size);
+static void *slab_alloc(usize size, enum mflags flags);
+static void slab_free(void *ptr);
+
+static struct slab *slab_create(unsigned int entry_size, enum mflags flags);
+
+static inline int get_order(usize size)
+{
+	int order;
+	usize order_size = PAGE_SIZE;
+
+	for (order = 0; order <= GET_PAGE_MAX_ORDER; order++) {
+		if (order_size >= size)
+			break;
+		order_size <<= 1;
+	}
+
+	return order;
+}
+
+void *kmalloc(usize size, enum mflags flags)
+{
+	if (size > SLAB_MAX_ALLOC) {
+		if (flags & M_CONTIG) {
+			int order = get_order(size);
+			if (order > GET_PAGE_MAX_ORDER) {
+				slab_debug("Requested alloc size %zu too large for get_pages()\n",
+					   size);
+				return nil;
+			} else {
+				return get_pages(order, flags);
+			}
+		} else {
+			slab_debug("Refusing to allocate %zu bytes as slabs\n", size);
+			return nil;
+		}
+	} else {
+		return slab_alloc(size, flags);
+	}
+}
+
+void kfree(void *ptr)
+{
+	kprintf("kfree() is not implemented yet lmao\n");
+}
 
 void slab_init(void)
 {
@@ -86,10 +127,10 @@ void slab_init(void)
 		clist_init(&pools[i]);
 }
 
-void *slab_alloc(usize size, enum mm_flags flags)
+static inline void *slab_alloc(usize size, enum mflags flags)
 {
-	size = round_size(size);
-	if (size == 0)
+	size = align_ceil(size, SLAB_STEP);
+	if (size == 0 || size > SLAB_MAX_ALLOC)
 		return nil;
 
 	struct clist *pool = &pools[size / SLAB_STEP - 1];
@@ -119,7 +160,7 @@ void *slab_alloc(usize size, enum mm_flags flags)
 	return (void *)ret;
 }
 
-void slab_free(void *ptr)
+static inline void slab_free(void *ptr)
 {
 #	if CFG_DEBUG_SLAB_ALLOCS
 		if (ptr < kheap_start || ptr >= kheap_end) {
@@ -138,7 +179,7 @@ void slab_free(void *ptr)
 		memset(ptr, 'A', slab->entry_size);
 #	endif
 
-	if (slab->free_entries * slab->entry_size + slab->entry_size > SLAB_EFFECTIVE_SIZE) {
+	if (slab->free_entries * slab->entry_size + slab->entry_size > SLAB_MAX_ALLOC) {
 		/* none of the entries are in use, free the slab */
 		slab_debug_noisy("Destroying empty cache of size %zu\n", slab->entry_size);
 		free_pages(slab);
@@ -147,7 +188,7 @@ void slab_free(void *ptr)
 	}
 }
 
-static struct slab *slab_create(unsigned int entry_size, enum mm_flags flags)
+static struct slab *slab_create(unsigned int entry_size, enum mflags flags)
 {
 	slab_debug_noisy("Creating new cache for size %zu\n", entry_size);
 	struct slab *slab = get_pages(SLAB_SIZE / PAGE_SIZE, flags);
@@ -168,19 +209,6 @@ static struct slab *slab_create(unsigned int entry_size, enum mm_flags flags)
 	return slab;
 }
 
-static inline usize round_size(usize size)
-{
-	if (size > SLAB_MAX_ALLOC)
-		return 0;
-
-	/* SLAB_STEP is a power of 2, so clang will (hopefully)
-	 * replace these with fancy bit banging tricks */
-	if (size % SLAB_STEP)
-		size = (size / SLAB_STEP) * SLAB_STEP + SLAB_STEP;
-
-	return size;
-}
-
 /*
  * This file is part of GayBSD.
  * Copyright (c) 2021 fef <owo@fef.moe>.