mm/page: yet another overhaul

This is primarily for the slab allocator update that's about to come, but to be completely tbh not even i myself am sure what i should include here because i made a longer break and there are like 40 modified files that are all interlinked.
2022-04-01 16:56:08 +02:00 · 2022-04-01 16:56:08 +02:00 · 30df044cec
commit 30df044cec
parent b4ed811920
2 changed files with 78 additions and 42 deletions
--- a/include/gay/vm/page.h
+++ b/include/gay/vm/page.h
@ -20,6 +20,7 @@ union vm_page_attr {
 		bool pcpu:1;		/**< @brief Page is in a per-cpu cache */
 		bool slab:1;		/**< @brief Page is used by the slab allocator */
 		unsigned zone:2;	/**< @brief Index into `mm_zones` */
+		bool zero:1;		/**< @brief Page is known to contain only zeroes */
 	};
 };
 #define _PGA_ORDER_SHIFT	0
@ -34,6 +35,8 @@ union vm_page_attr {
 #define _PGA_SLAB_MASK		(1 << _PGA_SLAB_SHIFT)
 #define _PGA_ZONE_SHIFT		12
 #define _PGA_ZONE_MASK		(3 << _PGA_ZONE_SHIFT)
+#define _PGA_ZERO_SHIFT		13
+#define _PGA_ZERO_MASK		(1 << _PGA_ZERO_SHIFT)

 typedef union vm_page_attr vm_page_attr_t;

@ -50,7 +53,7 @@ struct vm_page {
 	atom_t count;
 	/** @brief Page attributes, use the macros below to access this */
 	atom_t attr;
-	/** @brief Page frame number */
+	/** @brief Page frame number (= `paddr >> PAGE_SHIFT`) */
 	u_long pfn;
 	/**
 	 * @brief If the page is free, this is its freelist.
@ -113,6 +116,12 @@ static inline bool pga_slab(vm_page_t page)
 	return attr.slab;
 }

+static inline bool pga_zero(vm_page_t page)
+{
+	union vm_page_attr attr = { ._val = atom_read(&page->attr) };
+	return attr.zero;
+}
+
 static inline enum mm_zone_type pga_zone(vm_page_t page)
 {
 	union vm_page_attr attr = { ._val = atom_read(&page->attr) };
@ -157,6 +166,14 @@ static inline enum mm_zone_type pga_set_zone(vm_page_t page, enum mm_zone_type z
 	}
 }

+static inline bool pga_set_zero(vm_page_t page, bool zero)
+{
+	if (zero)
+		return atom_set_bit(&page->attr, _PGA_ZERO_SHIFT);
+	else
+		return atom_clr_bit(&page->attr, _PGA_ZERO_SHIFT);
+}
+
 static __always_inline bool page_get(vm_page_t page)
 {
 	return atom_inc(&page->count);
@ -172,7 +189,7 @@ static __always_inline bool page_put(vm_page_t page)
 static inline void page_lock(vm_page_t page)
 {
 	spin_loop {
-		if (atom_set_bit(&page->attr, _PGA_LOCK_SHIFT))
+		if (!atom_set_bit(&page->attr, _PGA_LOCK_SHIFT))
 			break;
 	}
 }
@ -182,9 +199,16 @@ static __always_inline void page_unlock(vm_page_t page)
 	atom_clr_bit(&page->attr, _PGA_LOCK_SHIFT);
 }

+/**
+ * @brief Attempt to lock a page.
+ * Must be called with interrupts disabled.
+ *
+ * @param page Page to lock.
+ * @return `true` if you claimed the lock, `false` if not.
+ */
 static __always_inline bool page_trylock(vm_page_t page)
 {
-	return atom_set_bit(&page->attr, _PGA_LOCK_SHIFT);
+	return !atom_set_bit(&page->attr, _PGA_LOCK_SHIFT);
 }

 static inline void __page_set_flag(vm_page_t page, unsigned flag)
@ -275,3 +299,9 @@ static inline void *pfn2vaddr(u_long pfn)
 	PGADDR_ASSERT(&vm_page_array[pfn] < _vm_page_array_end);
 	return DMAP_START + (pfn << PAGE_SHIFT);
 }
+
+__pure2
+static inline vm_paddr_t pg2paddr(vm_page_t page)
+{
+	return (vm_paddr_t)page->pfn << PAGE_SHIFT;
+}
--- a/kernel/mm/page.c
+++ b/kernel/mm/page.c
@ -96,10 +96,6 @@ static inline void claim_bmem_area(struct mm_zone *zone, const struct _bmem_area
 		/* only the first page in the order group is inserted into
 		 * the freelist, but all of them need to be initialized */
 		for (u_int i = 0; i < (1u << order); i++) {
-			if (pos >= end)
-				panic("page %p out of range", pos);
-			if (atom_read(&pos->count) != 420)
-				panic("page %p double initialized\n", pos);
 			atom_init(&pos->count, 0);
 			atom_init(&pos->attr, 0);

@ -158,7 +154,8 @@ void paging_init(vm_paddr_t phys_end)
 	vm_paddr_t bitmap_start_phys = __boot_pmalloc(bitmap_size_log2, MM_ZONE_NORMAL);
 	panic_if(bitmap_start_phys == BOOT_PMALLOC_ERR,
 		 "cannot allocate memory for the page bitmaps");
-	memset(__v(bitmap_start_phys), 0, bitmap_total_size);
+	for (int i = 0; i < (1 << bitmap_size_log2); i++)
+		__boot_clear_page(bitmap_start_phys + (i * PAGE_SIZE));

 	/*
 	 * initialize the pools
@ -192,7 +189,7 @@ void paging_init(vm_paddr_t phys_end)
 		/* This is merely an optimization to simplify checking whether
 		 * two buddies can be coalesced into one.  In reality, the
 		 * reference count is invalid because the page is reserved. */
-		atom_init(&vm_page_array[pfn].count, 420);
+		atom_init(&vm_page_array[pfn].count, INT_MIN);
 		atom_init(&vm_page_array[pfn].attr, _PGA_RSVD_MASK);
 		vm_page_array[pfn].pfn = pfn;
 	}
@ -207,12 +204,12 @@ void paging_init(vm_paddr_t phys_end)
 			/* make sure the boot memory allocator cannot under any circumstances hand
 			 * out pages from this area anymore, even though that should be unnecessary */
 			clist_del(&area->link);
-
 			claim_bmem_area(zone, area);
-			zone->thrsh.emerg = latom_read(&zone->free_count) / CFG_PAGE_EMERG_DENOM;
-			if (zone->thrsh.emerg > CFG_PAGE_EMERG_MAX)
-				zone->thrsh.emerg = CFG_PAGE_EMERG_MAX;
 		}
+
+		zone->thrsh.emerg = latom_read(&zone->free_count) / CFG_PAGE_EMERG_DENOM;
+		if (zone->thrsh.emerg > CFG_PAGE_EMERG_MAX)
+			zone->thrsh.emerg = CFG_PAGE_EMERG_MAX;
 	}
 }

@ -227,9 +224,18 @@ vm_page_t page_alloc(u_int order, enum mflags flags)
 {
 	if (order > MM_MAX_ORDER) {
 		page_debug("get_pages(%d, %#08x): Order too high!\n", order, flags);
-		return nil;
+		return INVALID_PAGE;
 	}

+	/*
+	 * See if the requested zone has enough free pages for the allocation.
+	 * If not, fall back to lower physical memory (i.e. use a zone with
+	 * smaller index).  Repeat until we either find a zone that has enough
+	 * free pages, or until we've run out of zones (in which case the
+	 * allocation failed).  Just because we found a zone doesn't mean we've
+	 * succeeded, since the pages in that zone might not be contiguous.
+	 * If they're not, we have to try again (see further down below).
+	 */
 	struct mm_zone *zone = &mm_zones[_M_ZONE_INDEX(flags)];
 	long count_after;
 try_next_zone:
@ -242,7 +248,7 @@ try_next_zone:
 				zone--;
 				goto try_next_zone;
 			} else {
-				return nil;
+				return INVALID_PAGE;
 			}
 		}
 	}
@ -254,9 +260,9 @@ try_next_zone:
 	 * requested order, and if it's empty, go over to the next higher order.
 	 * Repeat until we found a page, or we've reached the highest order.
 	 */
-	vm_page_t page = nil;
+	vm_page_t page = INVALID_PAGE;
 	u_int page_order = order;
-	while (page == nil && page_order < MM_NR_ORDERS) {
+	while (!page && page_order < MM_NR_ORDERS) {
 		struct mm_pool *pool = &zone->pools[page_order];

 		disable_intr();
@ -276,7 +282,7 @@ try_next_zone:
 		intr_restore(cpuflags);
 	}

-	if (page == nil) {
+	if (!page) {
 		if (zone > &mm_zones[0]) {
 			/*
 			 * If we reach this, the current zone technically had enough free
@ -288,7 +294,7 @@ try_next_zone:
 			zone--;
 			goto try_next_zone;
 		} else {
-			return nil;
+			return INVALID_PAGE;
 		}
 	}

@ -312,7 +318,7 @@ try_next_zone:

 		disable_intr();
 		spin_lock(&pool->lock);
-		clist_add_first(&pool->freelist, &buddy->link);
+		clist_add(&pool->freelist, &buddy->link);
 		pool->free_entries++;
 		spin_unlock(&pool->lock);
 		intr_restore(cpuflags);
@ -320,7 +326,14 @@ try_next_zone:

 	for (u_int i = 0; i < (1 << order); i++)
 		pga_set_order(&page[i], order);
-	page_clear(page);
+
+	/* future versions will have a background thread that
+	 * clears pages in the freelist when the cpu is idle */
+	if ((flags & _M_ZERO) && !pga_zero(page))
+		page_clear(page);
+	/* XXX only clear the zero flag when the page actually becomes dirty */
+	pga_set_zero(page, false);
+
 	return page;
 }

@ -378,21 +391,14 @@ void page_free(vm_page_t page)
 	PAGE_ASSERT((uintptr_t)ptr % ORDER_SIZE(order) == 0);
 	u_long pfn = pg2pfn(page);

-	PAGE_DEBUG_BLOCK {
-		int old_count = atom_sub(&page->count, 1);
-		if (old_count != 1) {
-			if (old_count == 0)
-				page_debug("double free of %p", ptr);
-			else
-				page_debug("attempted to free %p with references", ptr);
-			return;
-		}
-	} else {
-		atom_dec(&page->count);
+	if (atom_dec(&page->count)) {
+		page_debug("Double free of %p", page);
+		return;
 	}

 	struct mm_zone *zone = &mm_zones[pga_zone(page)];
 	latom_add(&zone->free_count, (1 << order));
+	struct mm_pool *pool = &zone->pools[order];

 	/* try to coalesce free buddy blocks until we're reached the highest order */
 	while (order < MM_MAX_ORDER) {
@ -405,30 +411,30 @@ void page_free(vm_page_t page)
 		 * to avoid blocking other CPUs for longer than necessary */
 		vm_page_t buddy = &vm_page_array[pfn ^ (1ul << order)];
 		vm_page_t low = &vm_page_array[pfn & ~(1ul << order)];
-		struct mm_pool *current_order_pool = &zone->pools[order];
-		struct mm_pool *next_order_pool = &zone->pools[order + 1];

 		disable_intr();
-		spin_lock(&zone->pools[order].lock);
+		spin_lock(&pool->lock);
 		if (can_merge(page, buddy)) {
+			/* remove buddy from the low order freelist */
 			clist_del(&buddy->link);
-			current_order_pool->free_entries--;
+			pool->free_entries--;
+			spin_unlock(&pool->lock);
+
 			pga_set_order(buddy, order + 1);
 			pga_set_order(page, order + 1);
-			clist_add(&next_order_pool->freelist, &low->link);
-			next_order_pool->free_entries++;
 		} else {
-			order = MM_MAX_ORDER; /* break out of the loop */
+			spin_unlock(&pool->lock);
+			intr_restore(cpuflags);
+			break;
 		}
-		spin_unlock(&zone->pools[order].lock);
-		intr_restore(cpuflags);

 		page = low;
+		pfn = pg2pfn(page);
 		order++;
+		pool++;
 	}

 	/* finally, we need to insert the page at its freelist */
-	struct mm_pool *pool = &zone->pools[order];
 	disable_intr();
 	spin_lock(&pool->lock);
 	clist_add(&pool->freelist, &page->link);